/src/com/atlassian/uwc/converters/dokuwiki/DokuWikiLinkConverter.java
Java | 211 lines | 121 code | 23 blank | 67 comment | 21 complexity | 86abf1dcdd308477fb5c1209a2f69e42 MD5 | raw file
- package com.atlassian.uwc.converters.dokuwiki;
- import com.atlassian.uwc.ui.ConverterEngine;
- import com.atlassian.uwc.ui.Page;
- import com.atlassian.uwc.converters.BaseConverter;
- import org.apache.log4j.Logger;
- import java.net.URLEncoder;
- import java.io.UnsupportedEncodingException;
- /**
- * A custom converter to turn DokuWiki's links into Confluence page names.
- *
- * <strong>NOTE:</strong> This class is heavily dependent on the page name
- * set by ConverterEngine.setupPages(). Any change there will probably force a change here.
- *
- * @author Rex (Rolf Staflin)
- * @version $Id$
- */
- public class DokuWikiLinkConverter extends BaseConverter {
- private static Logger log = Logger.getLogger(DokuWikiLinkConverter.class);
- private static final String LINK_START = "[[";
- private static final String LINK_END = "]]";
- private static final String SEPARATOR = "|";
- /**
- * These are assumed to be protocols rather than DokuWiki namespaces.
- */
- private static final String[] protocols = {
- "file",
- "http",
- "https",
- "ftp",
- "mailto",
- "svn"
- };
- /**
- * Converts any links from the DokuWiki format to Confluence's format. Any links pointing to
- * other documents in the wiki are massaged further so that they point to the correct page title.
- * @param page A page with text to be converted.
- */
- public void convert(Page page) {
- assert page != null;
- assert page.getOriginalText() != null;
- String text = page.getOriginalText();
- int linkStart = text.indexOf(LINK_START);
- while (linkStart >= 0) {
- int linkEnd = text.indexOf(LINK_END, linkStart);
- if (linkEnd < 0) {
- break;
- }
- String link = text.substring(linkStart + 2, linkEnd);
- int separator = link.indexOf(SEPARATOR);
- String linkText = null;
- String linkTarget = link.trim();
- if (separator >= 0) {
- linkText = link.substring(separator + 1).trim();
- linkTarget = link.substring(0, separator).trim();
- // Remove any line breaks from the link text
- linkText = linkText.replaceAll("\r\n", " ");
- linkText = linkText.replaceAll("\r", " ");
- linkText = linkText.replaceAll("\n", " ");
- }
- if (isPageReference(linkTarget)) {
- // First of all, this may be a local reference (e.g., from
- // foo:bar you can link to foo:baz with [[baz]], and we need
- // to change that into [foo -- baz] because that's what the
- // baz page will have been renamed to.
- if (linkTarget.indexOf(":") < 0) {
- // Get the name space from the current page name.
- int lastSeparator = page.getName().lastIndexOf(ConverterEngine.CONFLUENCE_SEPARATOR);
- if (lastSeparator >= 0) {
- linkTarget = page.getName().substring(0, lastSeparator +
- ConverterEngine.CONFLUENCE_SEPARATOR.length()) +
- linkTarget;
- }
- } else {
- // Replace colons with the separator used in naming the pages.
- linkTarget = linkTarget.replaceAll(":", ConverterEngine.CONFLUENCE_SEPARATOR);
- }
- // Replace underscores with spaces
- linkTarget = linkTarget.replaceAll("_", " ");
- } else {
- linkTarget = normalizeLink(linkTarget);
- }
- StringBuffer newText = new StringBuffer("[");
- if (linkText != null) {
- newText.append(linkText);
- } else {
- newText.append(link);
- }
- newText.append(SEPARATOR).append(linkTarget);
- newText.append("]");
- text = text.substring(0, linkStart) + newText.toString() + text.substring(linkEnd + LINK_END.length());
- linkStart = text.indexOf(LINK_START);
- }
- page.setConvertedText(text);
- // Lastly, we update the page name
- formatPageName(page);
- }
- /**
- * "Normalizes" a link by doing the following:
- *
- * <li>Replacing all backslashes with forward slashes
- * (otherwise Confluence strips them from the links)
- * <li>Replacing spaces with "+"
- * <li>Changing the protocol file: into http: (file: does not seem to work)
- * <li>Adding the protocol http: to links starting with "//"
- * </ul>
- * @param linkTarget the link to be normalized
- * @return The normalized string
- */
- public static String normalizeLink(String linkTarget) {
- assert linkTarget != null;
- linkTarget = linkTarget.replaceAll("\\\\", "/");
- /* linkTarget = linkTarget.replaceAll("?", "%C3%A5");
- linkTarget = linkTarget.replaceAll("?", "%C3%A4");
- linkTarget = linkTarget.replaceAll("?", "%C3%B6");
- linkTarget = linkTarget.replaceAll("?", "%C3%85");
- linkTarget = linkTarget.replaceAll("?", "%C3%84");
- linkTarget = linkTarget.replaceAll("?", "%C3%96");
- linkTarget = linkTarget.replaceAll(" ", "+");
- */
- if (linkTarget.startsWith("file:")) {
- linkTarget = "http:" + linkTarget.substring(5);
- }
- if (linkTarget.startsWith("//")) {
- linkTarget = "http:" + linkTarget;
- }
- try {
- linkTarget = URLEncoder.encode(linkTarget, "UTF-8");
- } catch (UnsupportedEncodingException ignored) {
- log.error("Could not URL-encode target!", ignored);
- }
- // Now the encoder has ruined the colons and slashes :P. Fix that.
- linkTarget = linkTarget.replaceAll("%3A", ":");
- linkTarget = linkTarget.replaceAll("%2F", "/");
- return linkTarget;
- }
- /**
- * Makes the page name prettier by removing the file name extension,
- * replacing underscores with spaces and finally converting the first
- * character into upper case. E.g., "my_page.txt" is converted into "My page".
- * @param page A page with the name set.
- */
- private void formatPageName(Page page) {
- assert page != null;
- assert page.getName() != null;
- String name = page.getName();
- // Strip trailing file name extension.
- if (name.endsWith(".txt")) {
- name = name.substring(0, name.length()-4);
- }
- // Replace underscores with spaces
- name = name.replaceAll("_", " ");
- // Casify the name
- name = Character.toUpperCase(name.charAt(0)) + name.substring(1);
- page.setName(name);
- }
- /**
- * Determines if a link is a DokuWiki page reference or not.
- * Page references have the form [dir:][name] with one or more
- * [dir:] components, e.g., "path:to:a:page".
- *
- * The problem is that regular URL:s also contain colons; "mailto:foo" is not
- * a page reference, but "mail:foo" is. This method checks for some standard
- * protocol names and assumes that links that contain colons but do not start with
- * one of the protocol names are page references.
- *
- * To find out what protocols your DokuWiki contains, run this in a command prompt
- * at the wiki base document directory:
- * grep -ohr "\[\[[0-9a-zA-Z]\*:" * | sort | uniq
- * Look through the resulting list and eliminate the matches that are DokuWiki name spaces.
- * The rest are protocols.
- *
- * @param target The link text.
- * @return True if and only if the text is a page reference.
- */
- public static boolean isPageReference(String target) {
- assert target != null;
- int colon = target.indexOf(':');
- if (colon < 0) {
- return true; // No colon in the string -- must be a local reference!
- }
- for (String protocol : protocols) {
- if (target.startsWith(protocol)) {
- return false; // This target uses an approved protocol
- }
- }
- return true;
- }
- }