PageRenderTime 46ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/src/com/atlassian/uwc/converters/mindtouch/LinkParser.java

https://bitbucket.org/bowmanb1/universal-wiki-converter
Java | 199 lines | 178 code | 14 blank | 7 comment | 48 complexity | c8a146d5320da7699d955e267eca906c MD5 | raw file
  1. package com.atlassian.uwc.converters.mindtouch;
  2. import java.io.File;
  3. import java.io.FileFilter;
  4. import java.util.regex.Matcher;
  5. import java.util.regex.Pattern;
  6. import org.apache.log4j.Logger;
  7. import org.xml.sax.Attributes;
  8. import org.xml.sax.SAXException;
  9. import com.atlassian.uwc.converters.tikiwiki.RegexUtil;
  10. import com.atlassian.uwc.converters.xml.DefaultXmlParser;
  11. import com.atlassian.uwc.filters.NoSvnFilter;
  12. import com.atlassian.uwc.ui.Page;
  13. public class LinkParser extends DefaultXmlParser {
  14. private static String target = "";
  15. private static String alias = "";
  16. private static String parent = "";
  17. private NoSvnFilter nosvnfilter = new NoSvnFilter();
  18. Logger log = Logger.getLogger(this.getClass());
  19. public enum Type {
  20. INTERNAL,
  21. EXTERNAL;
  22. public static Type getType(Attributes attributes) {
  23. String val = attributes.getValue("rel");
  24. if (val == null) return null;
  25. if (val.contains("internal")) return INTERNAL;
  26. if (val.contains("external")) return EXTERNAL;
  27. return null;
  28. }
  29. }
  30. Pattern hrefend = Pattern.compile("[^\\/]+$");
  31. private static boolean isImage = false;
  32. public void startElement(String uri, String localName, String qName, Attributes attributes) {
  33. String href = attributes.getValue("href");
  34. Type type = Type.getType(attributes);
  35. switch (type) {
  36. case INTERNAL:
  37. isImage = isImage(href);
  38. if (isImage) {
  39. getImageTarget(href);
  40. }
  41. else {
  42. Matcher endFinder = hrefend.matcher(href);
  43. target = endFinder.find()?endFinder.group():href;
  44. if (target.contains("_")) target = fixUnderscores(target, href, getPage());
  45. }
  46. break;
  47. case EXTERNAL:
  48. target = href;
  49. }
  50. }
  51. Pattern parents = Pattern.compile("http://[^\\/]+\\/(.*)$", Pattern.DOTALL);
  52. protected String fixUnderscores(String input, String href, Page page) {
  53. if (getProperties() != null && getProperties().containsKey("exportdir")) {
  54. String exportdir = getProperties().getProperty("exportdir", null);
  55. if (exportdir == null) return input;
  56. File export = new File(exportdir);
  57. if (!export.exists() || !export.isDirectory()) {
  58. log.warn("exportdir does not exist or is not a directory. " + exportdir);
  59. return input;
  60. }
  61. Matcher parentFinder = parents.matcher(href);
  62. if (parentFinder.find()) {
  63. String parentString = parentFinder.group(1);
  64. String[] parentArray = parentString.split("/");
  65. File[] exportfiles = export.listFiles(nosvnfilter);
  66. File rootFile = null;
  67. //get list of export pages to work with (ignore mindtouch root page, if it's there)
  68. boolean foundMindtouchAsRoot = false;
  69. for (File file : exportfiles) {
  70. if (file.isDirectory() && file.getName().endsWith("_MindTouch_subpages")) {
  71. foundMindtouchAsRoot = true;
  72. rootFile = file;
  73. break; //should only be one
  74. }
  75. }
  76. File[] rootFiles;
  77. if (!foundMindtouchAsRoot) rootFiles = exportfiles;
  78. else rootFiles = rootFile.listFiles(nosvnfilter);
  79. //walk the tree to get the leaf file for the input/href/page
  80. File leafFile = getFile(parentArray, rootFiles);
  81. if (leafFile == null) return input;
  82. return fixUnderscores(input, leafFile);
  83. }
  84. else return input;
  85. }
  86. return input;
  87. }
  88. private File getFile(String[] ancestors, File[] thisdir) {
  89. if (thisdir == null) return null;
  90. String current = ancestors[0];
  91. File found = null;
  92. //find the file associated with the top ancestor
  93. for (File file : thisdir) {
  94. String cleaned = current.replaceAll("_", "");
  95. cleaned = cleaned.replaceAll("[%]\\w\\w", ""); //hack: remove url encoding
  96. String regex = "^\\d+\\Q"+cleaned+"\\E\\.xml$";
  97. String noUS = file.getName().replaceAll("_", "");
  98. if (noUS.matches(regex)) {
  99. found = file;
  100. break;
  101. }
  102. }
  103. if (ancestors.length == 1) return found;
  104. if (found == null) return null;
  105. //get branch of ancestors
  106. int len = ancestors.length;
  107. String[] branch = new String[len-1];
  108. System.arraycopy(ancestors, 1, branch, 0, branch.length);
  109. //get subpages dir
  110. File subdir = getSubpagesDir(found);
  111. //recurse - walk the tree
  112. return getFile(branch, subdir.listFiles(nosvnfilter));
  113. }
  114. private File getSubpagesDir(File input) {
  115. String dir = input.getAbsolutePath().replaceFirst("\\.xml$", "_subpages");
  116. return new File(dir);
  117. }
  118. Pattern untilUS = Pattern.compile("([^_]*)(_)");
  119. protected String fixUnderscores(String input, File file) {
  120. //places where input has underscore, but file doesn't need spaces
  121. String current = file.getName().replaceFirst("^\\d+_", "");
  122. current = current.replaceFirst("\\.xml$", "");
  123. Matcher usFinder = untilUS.matcher(input);
  124. StringBuffer sb = new StringBuffer();
  125. boolean found = false;
  126. while (usFinder.find()) {
  127. found = true;
  128. String part = usFinder.group();
  129. if (current.startsWith(part)) {
  130. if (current.length()>=part.length()) {
  131. current = current.substring(part.length());
  132. continue;
  133. }
  134. else break;
  135. }
  136. String replacement = usFinder.group(1) + " ";
  137. replacement = RegexUtil.handleEscapesInReplacement(replacement);
  138. usFinder.appendReplacement(sb, replacement);
  139. int minus = (usFinder.group(1)).length();
  140. if (current.length()>=minus) {
  141. current = current.substring(minus);
  142. }
  143. }
  144. if (found) {
  145. usFinder.appendTail(sb);
  146. return sb.toString();
  147. }
  148. return input;
  149. }
  150. Pattern image = new ImageParser().filename;
  151. protected boolean isImage(String input) {
  152. Matcher imageFinder = image.matcher(input);
  153. return imageFinder.find();
  154. }
  155. protected String getImageTarget(String input) {
  156. Matcher imageFinder = image.matcher(input);
  157. if (imageFinder.find()) {
  158. target = imageFinder.group(1);
  159. parent = imageFinder.group(2);
  160. return target;
  161. }
  162. target = input;
  163. return input;
  164. }
  165. public void endElement(String uri, String localName, String qName) {
  166. if (target.equals(alias)) alias = "";
  167. else alias += "|";
  168. String link;
  169. if (isImage) {
  170. if (parent == null || getPage() == null || parent.equals(getPage().getName()))
  171. target = "^" + target;
  172. else
  173. target = parent + "^" + target;
  174. }
  175. link = "[" + alias + target + "]";
  176. appendOutput(link);
  177. alias = target = "";
  178. isImage = false;
  179. }
  180. public void characters(char[] ch,
  181. int start,
  182. int length) throws SAXException {
  183. alias += String.copyValueOf(ch, start, length);
  184. }
  185. }