PageRenderTime 36ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/src/com/atlassian/uwc/converters/jotspot/AttachmentConverter.java

https://bitbucket.org/bowmanb1/universal-wiki-converter
Java | 310 lines | 272 code | 16 blank | 22 comment | 3 complexity | f0cf3c92fa1dc86c7367b877d1b1ec2c MD5 | raw file
  1. package com.atlassian.uwc.converters.jotspot;
  2. import java.io.File;
  3. import java.io.FileNotFoundException;
  4. import java.io.UnsupportedEncodingException;
  5. import java.net.URLEncoder;
  6. import java.nio.ByteBuffer;
  7. import java.nio.CharBuffer;
  8. import java.nio.charset.CharacterCodingException;
  9. import java.nio.charset.Charset;
  10. import java.nio.charset.CharsetEncoder;
  11. import java.util.Scanner;
  12. import java.util.Vector;
  13. import java.util.regex.Matcher;
  14. import java.util.regex.Pattern;
  15. import org.apache.log4j.Logger;
  16. import com.atlassian.uwc.converters.BaseConverter;
  17. import com.atlassian.uwc.ui.ConfluenceSettingsForm;
  18. import com.atlassian.uwc.ui.Page;
  19. /**
  20. * Prepares jotspot attachments for uploading to Confluence.
  21. * NOTE: This class was heavily influenced by TwikiPrepareAttachmentFilesConverter
  22. * @author Laura Kolker
  23. */
  24. public class AttachmentConverter extends BaseConverter {
  25. private static final String JOTSPOT_IMAGE_DIR = "/_data/";
  26. private static final String JOTSPOT_IMAGE_DATA = "attach.URI.dat";
  27. Logger log = Logger.getLogger(this.getClass());
  28. public void convert(Page page) {
  29. log.debug("Converting Jotspot Attachments -- starting");
  30. // scan the page and create a list of attachments
  31. addAttachmentsToPage(page);
  32. log.debug("Converting Jotspot Attachments -- complete");
  33. }
  34. /**
  35. * looks for attachments and attaches them
  36. * @param page object to attach pages to
  37. */
  38. protected void addAttachmentsToPage(Page page) {
  39. //two types of attachments, images and attachments
  40. //images
  41. Vector<String> imagepaths = getImagePaths(page.getOriginalText());
  42. attach(imagepaths, page);
  43. //attachments
  44. Vector<String> attachmentPaths = getAttachmentPaths(page);
  45. attach(attachmentPaths, page);
  46. }
  47. /**
  48. * attaches given String Vector of paths to page
  49. * @param paths
  50. * @param page
  51. */
  52. private void attach(Vector<String> paths, Page page) {
  53. //foreach path in paths
  54. for (String path : paths) {
  55. //get the complete path to the file
  56. log.debug("complete path = " + path);
  57. //confirm existance of file
  58. File file = new File(path);
  59. if (!file.exists() || file.isDirectory())
  60. continue;
  61. //attach the file
  62. log.debug("adding attachment: " + file.getName());
  63. page.addAttachment(file);
  64. }
  65. }
  66. String correctPath = "(.*\\/([^/]+)\\" + JOTSPOT_IMAGE_DIR + ")";
  67. Pattern correctPathPattern = Pattern.compile(correctPath);
  68. /**
  69. * changes the jotspot default imagename (which is a .dat) to the
  70. * actual image name, by examining the path to said image
  71. * @param path default path
  72. * <br>For example:
  73. * /attachDirectory/System/TmpImageUpload/hobbespounce.gif/_data/attach.URI.dat
  74. * @return useful path
  75. * <br/>For example:
  76. * /attachDirectory/System/TmpImageUpload/hobbespounce.gif/_data/hobbespounce.gif
  77. */
  78. private String getUsefulImagePath(String path) {
  79. log.debug("renaming path = " + path);
  80. Matcher correctPathFinder = correctPathPattern.matcher(path);
  81. if (correctPathFinder.lookingAt()) {
  82. String correctName = correctPathFinder.group(2);
  83. String correctDir = correctPathFinder.group(1);
  84. //check for ascii char encodings (only encode directory
  85. //as wiki syntax won't find encoded character filenames)
  86. correctDir = encodeChars(correctDir);
  87. path = correctDir + correctName;
  88. log.debug("correct path = " + path);
  89. }
  90. return path;
  91. }
  92. Pattern nonfilenameChars = Pattern.compile("[^-\\w+ \\.\\/\\\\]");
  93. /**
  94. * checks for non filename safe characters (like , (comma) for example)
  95. * and encodes thems.
  96. * @param string For example:
  97. * thisStringHasA,comma
  98. * @return encoded nonword characters, For example:
  99. * thisStringHasA%2Ccomma
  100. */
  101. protected String encodeChars(String string) {
  102. String encoding = "UTF-8";//FIXME long term, provide property for this?
  103. Matcher nonwordcharFinder = nonfilenameChars.matcher(string);
  104. StringBuffer sb = new StringBuffer();
  105. //find the nonfilename characters
  106. while (nonwordcharFinder.find()) {
  107. String badChar = nonwordcharFinder.group();
  108. String encoded = null;
  109. //and encode them.
  110. try {
  111. encoded = URLEncoder.encode(badChar, encoding);
  112. } catch (UnsupportedEncodingException e) {
  113. log.error("Problem with encoding: " + encoding);
  114. e.printStackTrace();
  115. }
  116. nonwordcharFinder.appendReplacement(sb, encoded);
  117. }
  118. nonwordcharFinder.appendTail(sb);
  119. String encodedStr = sb.toString();
  120. if (encodedStr == null || "".equals(encodedStr))
  121. encodedStr = string;
  122. return encodedStr;
  123. }
  124. String imgSrc = "<img src=\"([^\"]+)\"[^/]*/>";
  125. Pattern imgPattern = Pattern.compile(imgSrc);
  126. /**
  127. * @param input page text
  128. * @return list of absolute paths to images
  129. */
  130. private Vector<String> getImagePaths(String input) {
  131. Vector<String> paths = new Vector<String>();
  132. //get paths from img src using regex
  133. Matcher imgFinder = imgPattern.matcher(input);
  134. String root = this.getAttachmentDirectory();
  135. while (imgFinder.find()) {
  136. String path = imgFinder.group(1);
  137. if (!path.startsWith("\\/")) path = "/" + path;
  138. path = root + path; //this is actually an image directory !
  139. String tmpPath = path + JOTSPOT_IMAGE_DIR + JOTSPOT_IMAGE_DATA; //here's where the image is
  140. String toPath = getUsefulImagePath(tmpPath); //but that's a useless name for the file, so we change it
  141. tmpPath = encodeChars(tmpPath); //only encode directory, as wiki syntax won't find encoded character filenames
  142. File tmpFile = new File(tmpPath);
  143. File toFile = new File(toPath);
  144. if (!tmpFile.renameTo(toFile)) { //this happens when there's something there already
  145. log.debug("Can't rename " + tmpFile + " to " + toFile);
  146. }
  147. log.debug("getImagePaths path = " + toPath);
  148. paths.add(toPath);
  149. }
  150. return paths;
  151. }
  152. /**
  153. * figures out what attachments are associated with this page.
  154. * @param page
  155. * @return String vector of absolute paths to attachments
  156. */
  157. private Vector<String> getAttachmentPaths(Page page) {
  158. Vector<String> paths = new Vector<String>();
  159. //get the directory with the same name as this page
  160. File pageDir = getPageDir(page);
  161. if (pageDir == null)
  162. return paths;
  163. //go down one level
  164. File files[] = pageDir.listFiles();
  165. //open up the xml files
  166. for (File file : files) {
  167. //check for existence and non-directoryness
  168. if (!file.exists() || file.isDirectory())
  169. continue;
  170. //slurp string
  171. String fileContents = read(file);
  172. // log.debug("fileContents = " + fileContents);
  173. if (isAttachment(fileContents)) {
  174. String path = getPath(fileContents);
  175. String root = this.getAttachmentDirectory();
  176. String basePath = encodeChars(root + path + JOTSPOT_IMAGE_DIR);
  177. String tmpPath = basePath + JOTSPOT_IMAGE_DATA;
  178. path = basePath + getFilename(path);
  179. File tmpFile = new File(tmpPath);
  180. File toFile = new File(path);
  181. if (!tmpFile.renameTo(toFile)) { //this happens when there's already a file there
  182. log.debug("Can't rename " + tmpPath + " to " + path);
  183. }
  184. log.debug("attachment path = " + path);
  185. if (path == null)
  186. continue;
  187. paths.add(path);
  188. }
  189. }
  190. return paths;
  191. }
  192. /**
  193. * @param path
  194. * @return filename for a given filepath.
  195. * <br/>Example:
  196. * <br/>path = /Dirpath/myfile.txt
  197. * <br/>return = myfile.txt
  198. */
  199. private String getFilename(String path) {
  200. File file = new File(path);
  201. return file.getName();
  202. }
  203. Pattern attachmentPath = Pattern.compile("<node[^>]+?path=\"([^\"]*)\"");
  204. /**
  205. * Uses the given file contents to determine the path to the attachment
  206. * @param fileContents
  207. * @return
  208. */
  209. private String getPath(String fileContents) {
  210. log.debug("non image attachment!!!");
  211. Matcher pathFinder = attachmentPath.matcher(fileContents);
  212. if (pathFinder.find()) {
  213. String path = pathFinder.group(1);
  214. path = path.replaceAll(" ", "+");
  215. log.debug("path = " + path);
  216. return path;
  217. }
  218. return null;
  219. }
  220. Pattern attachmentClue = Pattern.compile("nodeClass=\"attachment\"");
  221. /**
  222. * Uses the given file contents to determine if that file represents
  223. * an attachment
  224. * @param fileContents
  225. * @return true if fileContents represents an attachment
  226. */
  227. private boolean isAttachment(String fileContents) {
  228. Matcher attachmentClueFinder = attachmentClue.matcher(fileContents);
  229. return attachmentClueFinder.find();
  230. }
  231. /**
  232. * reads the contents of the file
  233. * @param file file is assumed to be an existing non-directory file
  234. * @return the contents of the file as a string or null if file could
  235. * not be found
  236. */
  237. private String read(File file) {
  238. String string = "";
  239. try {
  240. Scanner in = new Scanner(file);
  241. while (in.hasNext()) {
  242. string += in.next() + " ";
  243. }
  244. } catch (FileNotFoundException e) {
  245. log.debug("Problem opening file: " + file.getName());
  246. e.printStackTrace();
  247. return null;
  248. }
  249. return string;
  250. }
  251. Pattern extension = Pattern.compile("(.*)\\.\\w+$");
  252. /**
  253. * figures out the associated directory for the given page
  254. * @param page
  255. * @return page directory or null if none exists
  256. * <br/>Example:
  257. * <br/>If the page exists at: /SomeDirectory/My+Page.xml
  258. * <br/>returns /SomeDirectory/My+Page if that represents an existing directory
  259. */
  260. private File getPageDir(Page page) {
  261. String similarPath = page.getFile().getPath();
  262. log.debug("similar = " + similarPath);
  263. Matcher extFinder = extension.matcher(similarPath);
  264. if (extFinder.lookingAt()) {
  265. String dirPath = extFinder.group(1);
  266. log.debug("dirPath = " + dirPath);
  267. File file = new File(dirPath);
  268. if (file.exists() && file.isDirectory()) {
  269. return file;
  270. }
  271. }
  272. return null;
  273. }
  274. }