PageRenderTime 26ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/src/com/atlassian/uwc/exporters/SwikiFile.java

https://bitbucket.org/atlassianlabs/universal-wiki-connector
Java | 491 lines | 266 code | 60 blank | 165 comment | 25 complexity | 901f969917e8cfaec55e5d327feda621 MD5 | raw file
  1. /*
  2. * SwikiFile.java
  3. *
  4. * Created on March 5, 2007, 3:07 PM
  5. *
  6. * Represents an XML file holding one Swiki page
  7. */
  8. package com.atlassian.uwc.exporters;
  9. import java.io.File;
  10. import java.io.FileWriter;
  11. import java.io.InputStream;
  12. import java.io.ByteArrayInputStream;
  13. import java.io.FileInputStream;
  14. import java.io.FileOutputStream;
  15. import java.io.IOException;
  16. import java.nio.channels.FileChannel;
  17. import java.util.ArrayList;
  18. import java.util.HashMap;
  19. import java.util.regex.Matcher;
  20. import java.util.regex.Pattern;
  21. import java.lang.Character;
  22. import org.apache.log4j.Logger;
  23. import org.jdom.Document;
  24. import org.jdom.Element;
  25. import org.jdom.JDOMException;
  26. import org.jdom.input.SAXBuilder;
  27. /**
  28. *
  29. * @author sitongia
  30. */
  31. public class SwikiFile {
  32. private static final String FILE_SEP = System.getProperty("file.separator");
  33. private Logger log = Logger.getLogger(this.getClass());
  34. private File input_directory;
  35. private File output_directory;
  36. private File attachmentsOutDir;
  37. private File attachmentsInDir;
  38. private String name;
  39. private String text;
  40. private String number;
  41. private Element root;
  42. private StringBuffer buffer;
  43. private static HashMap<String, String> linksFound = new HashMap<String, String>();
  44. /**
  45. * Creates a new instance of SwikiFile
  46. */
  47. public SwikiFile(File input_directory, File attachmentsInDir, File output_directory,
  48. File attachmentsOutDir, String input_file) {
  49. String[] fileSplit = input_file.split("\\.");
  50. //linksFound.put(fileSplit[0] , input_file);
  51. log.info("Processing "+input_file);
  52. this.input_directory = input_directory;
  53. this.output_directory = output_directory;
  54. this.attachmentsOutDir = attachmentsOutDir;
  55. this.attachmentsInDir = attachmentsInDir;
  56. setNumber(input_file);
  57. File inputFile = new File(input_directory, input_file);
  58. if (!inputFile.exists()) {
  59. log.error("Input directory or file is in error.");
  60. return;
  61. }
  62. // Use JDOM to handle the XML
  63. Document Doc=this.readDocument(inputFile);
  64. // The name of the page is in the <name> element
  65. root = Doc.getRootElement();
  66. setName();
  67. setText();
  68. linksFound.put(fileSplit[0] , name);
  69. buffer = new StringBuffer();
  70. }
  71. /**
  72. * Get the numerical value of the original filename.
  73. */
  74. public String getNumber() {
  75. return number;
  76. }
  77. /**
  78. * Set the numerical value of the original filename.
  79. */
  80. private void setNumber(String name) {
  81. String[] parts = name.split("\\.");
  82. number = parts[0];
  83. }
  84. /**
  85. * Get the string value of the new filename.
  86. */
  87. public String getName() {
  88. return name;
  89. }
  90. /**
  91. * Set the string value of the new filename from
  92. * the XML <name/> element.
  93. */
  94. private void setName() {
  95. // The name content of the page is in the <name> element
  96. Element nameElement = root.getChild("name");
  97. name = nameElement.getText();
  98. // remove any swiki referance in the name
  99. name = name.replaceAll("[S|s][W|w][I|i][K|k][I|i]", "");
  100. name = name.replaceAll("/", "-");
  101. name = name.trim();
  102. }
  103. /**
  104. * Get the text data.
  105. */
  106. public String getText() {
  107. return text;
  108. }
  109. /**
  110. * Set the string value of the XML <text/> element.
  111. */
  112. private void setText() {
  113. // The text body content of the page is in the <text> element
  114. Element textElement = root.getChild("text");
  115. text = textElement.getText();
  116. }
  117. /**
  118. * Convert the links in the file to references to the filename.
  119. *
  120. * Parse the page, looking for page links, which are of the form
  121. * *#* where # is the name of the page linked to, in the same directory
  122. * and is #.xml
  123. */
  124. public void convert() {
  125. // TODO: pathological cases to consider:
  126. // TODO: case of '**'? Two asterisks that aren't a link
  127. // TODO: case of '*' at end of text!
  128. // TODO: case of '*a' at end of text!
  129. // TODO: case of '*666' at end of text!
  130. /*
  131. StringBuffer link = new StringBuffer();
  132. Character letter, nextletter;
  133. boolean isDescLink=false;
  134. for (int i = 0; i < text.length(); i++) {
  135. letter = Character.valueOf(text.charAt(i));
  136. if (letter.compareTo('*') == 0) {
  137. // Could be the start of a link
  138. link.setLength(0);
  139. for (int j = 1; j < 100; j++) {
  140. if (i+j == text.length()) {
  141. // Asterisk happens to appear near end of file
  142. buffer.append(letter);
  143. break;
  144. }
  145. nextletter = Character.valueOf(text.charAt(i+j));
  146. if (Character.isDigit(nextletter)) {
  147. // Save letter in buffer for the numerical value of link
  148. link.append(nextletter);
  149. } else if (nextletter.compareTo('*') == 0 && j > 1) {
  150. // End of link
  151. //System.out.println("Found link "+link);
  152. // Instantiate a page representing this XML file in the swiki
  153. if(!linksFound.containsKey(link.toString())){
  154. SwikiFile linked_page = new SwikiFile(input_directory, attachmentsInDir,
  155. output_directory, attachmentsOutDir, link+".xml");
  156. // Convert it
  157. linked_page.convert();
  158. linked_page.save();
  159. // Write the link to this newly converted page
  160. String newname = linked_page.getName();
  161. //TODO: remove spaces from name
  162. buffer.append('[');
  163. buffer.append(newname);
  164. buffer.append(']');
  165. i+=j;
  166. break;
  167. }
  168. else
  169. {
  170. buffer.append('[');
  171. buffer.append(linksFound.get(link.toString()));
  172. buffer.append(']');
  173. i+=j;
  174. break;
  175. }
  176. } else {
  177. // Not a link; write the letter out
  178. buffer.append(letter);
  179. break;
  180. }
  181. }
  182. } else {
  183. // Not a link; write the letter out
  184. buffer.append(letter);
  185. }
  186. }
  187. */
  188. processFalseLinks();
  189. processDirectLink();
  190. processDescriptionLink();
  191. }
  192. /************************************************************************
  193. * To process link like *1*
  194. */
  195. public void processDirectLink()
  196. {
  197. Pattern pattern = null;
  198. Matcher matcher = null;
  199. StringBuffer sb = new StringBuffer();
  200. boolean found=false;
  201. pattern = Pattern.compile("\\*([\\d]+)\\*");
  202. matcher = pattern.matcher(buffer.toString());
  203. while(matcher.find())
  204. {
  205. found=true;
  206. String link = matcher.group();
  207. String converted = link.replaceFirst("\\*", "");
  208. converted = converted.replaceAll("\\*$", "");
  209. if(!linksFound.containsKey(converted.toString())){
  210. SwikiFile linked_page = new SwikiFile(input_directory, attachmentsInDir,
  211. output_directory, attachmentsOutDir, converted+".xml");
  212. // Convert it
  213. linked_page.convert();
  214. linked_page.save();
  215. // Write the link to this newly converted page
  216. String newname = linked_page.getName();
  217. matcher.appendReplacement(sb, '[' + newname + ']');
  218. }
  219. else
  220. {
  221. matcher.appendReplacement(sb, '[' + linksFound.get(converted.toString()) + ']');
  222. }
  223. }
  224. matcher.appendTail(sb);
  225. if(found)
  226. buffer = sb;
  227. }
  228. /***************************************************************
  229. * to process the link like [ abc>1]
  230. */
  231. public void processDescriptionLink()
  232. {
  233. Pattern pattern = null;
  234. Matcher matcher = null;
  235. StringBuffer sb = new StringBuffer();
  236. boolean found=false;
  237. pattern = Pattern.compile("\\*.*\\>\\d*\\*");
  238. matcher = pattern.matcher(buffer.toString());
  239. while(matcher.find())
  240. {
  241. found=true;
  242. String buffer = matcher.group();
  243. int index=buffer.length();
  244. int length=buffer.length();
  245. while (index > 0)
  246. {
  247. if (buffer.charAt(index - 1) == '>')
  248. break;
  249. index--;
  250. }
  251. String ss1=buffer.substring(0, index);
  252. String ss2=buffer.substring(index, length - 1);
  253. if(!linksFound.containsKey(ss2.toString())){
  254. SwikiFile linked_page = new SwikiFile(input_directory, attachmentsInDir,
  255. output_directory, attachmentsOutDir, ss2+".xml");
  256. // Convert it
  257. linked_page.convert();
  258. linked_page.save();
  259. // Write the link to this newly converted page
  260. String newname = linked_page.getName();
  261. matcher.appendReplacement(sb, '*' + ss1 + newname + '*');
  262. }
  263. else
  264. {
  265. matcher.appendReplacement(sb, '*' + ss1 + linksFound.get(ss2.toString()) + '*');
  266. }
  267. }
  268. matcher.appendTail(sb);
  269. if(found)
  270. buffer = sb;
  271. }
  272. /**
  273. * Write the file to the output name given by the value of
  274. * the XML <name/> element.
  275. */
  276. public void save() {
  277. // Output file name is content of <name>
  278. File output_file = new File(output_directory, getName()+".txt");
  279. try {
  280. FileWriter pageFW = new FileWriter(output_file);
  281. pageFW.write(buffer.toString());
  282. pageFW.close();
  283. } catch (IOException ex) {
  284. ex.printStackTrace();
  285. }
  286. }
  287. /**
  288. * Swiki Attachments are stored in uploads/#. The # is related to the #.xml page.
  289. * e.g. Attachments for page 9.xml will be in uploads/9. These will be copied into
  290. * the exported/attachments directory
  291. * @author Kelly Meese
  292. *
  293. */
  294. public void copyAttachments()
  295. {
  296. log.info("Starting Copying attachments.");
  297. File uploads = attachmentsInDir;
  298. if(uploads.exists()){
  299. for(File srcFileName : uploads.listFiles()) {
  300. //System.out.println("Src file: " + srcFileName.getName());
  301. if(srcFileName.isFile()) {
  302. copyToDir(srcFileName, new File(attachmentsOutDir.getAbsolutePath() + FILE_SEP +
  303. srcFileName.getName()));
  304. }else if(srcFileName.isDirectory()){
  305. File uploadDir = srcFileName;
  306. for(File fileName : uploadDir.listFiles()){
  307. copyToDir(fileName, new File(attachmentsOutDir.getAbsolutePath() + FILE_SEP +
  308. fileName.getName()));
  309. }
  310. }
  311. }
  312. }
  313. log.info("Copy attachments complete.");
  314. }
  315. /**
  316. * Copies attachments from the source to dest. It uses the attachments in and out dir
  317. * for finding attachments and copying them for the attachments parser.
  318. * @param srcFileName
  319. * @param dstFileName
  320. */
  321. private void copyToDir(File srcFileName, File dstFileName){
  322. try {
  323. // Create channel on the source
  324. FileChannel srcChannel = new FileInputStream(srcFileName).getChannel();
  325. // Create channel on the destination
  326. FileChannel dstChannel = new FileOutputStream(dstFileName).getChannel();
  327. // Copy file contents from source to destination
  328. dstChannel.transferFrom(srcChannel, 0, srcChannel.size());
  329. // Close the channels
  330. srcChannel.close();
  331. dstChannel.close();
  332. } catch (IOException e) {
  333. log.error("Unable to copy file: " + e.getMessage());
  334. }
  335. }
  336. /**
  337. * Convert the remaining pages that do not have pages linked to them.
  338. *
  339. */
  340. public void convertRemainingPages(){
  341. log.info("Converting remaining pages");
  342. for(String fileName : input_directory.list()){
  343. if(fileName.endsWith(".xml")){
  344. if(!linksFound.containsValue(fileName)){
  345. SwikiFile linked_page = new SwikiFile(input_directory, attachmentsInDir,
  346. output_directory, attachmentsOutDir, fileName);
  347. linked_page.convert();
  348. linked_page.save();
  349. }
  350. }
  351. }
  352. log.info("Conversion of remaining pages complete.");
  353. }
  354. /**
  355. * Once the exporter has finished clear out the links.
  356. */
  357. public void clearLinks()
  358. {
  359. linksFound.clear();
  360. }
  361. /*****************************************************
  362. * To read the xml file and build an XML Document
  363. * @param fileName
  364. * @return
  365. */
  366. public Document readDocument(File file) {
  367. SAXBuilder builder = new SAXBuilder();
  368. Document doc = null;
  369. InputStream in = null;
  370. try {
  371. in = new FileInputStream(file);
  372. byte[] buf1 = new byte[1024];
  373. byte[] buf2 = new byte[0], buf3 = null;
  374. int size = 0;
  375. int len;
  376. while ((len = in.read(buf1)) > 0) {
  377. buf3 = new byte[size + len];
  378. System.arraycopy(buf2, 0, buf3, 0, size);
  379. System.arraycopy(buf1, 0, buf3, size, len);
  380. size += len;
  381. buf2 = new byte[size];
  382. System.arraycopy(buf3, 0, buf2, 0, size);
  383. }
  384. in.close();
  385. //replace the bad bytes with spaces
  386. for (int i = 0; i < size; i++) {
  387. if (buf3[i] == 6 || buf3[i] == 16 || buf3[i] > 127 || buf3[i] < 0)
  388. buf3[i] = 32;
  389. }
  390. ByteArrayInputStream bis = new ByteArrayInputStream(buf3);
  391. doc = builder.build(bis);
  392. } catch (IOException ex) {
  393. ex.printStackTrace();
  394. } catch (JDOMException ex) {
  395. ex.printStackTrace();
  396. }
  397. return doc;
  398. }
  399. /*************************************************************************
  400. * to convert the false links ([something]) from swiki to italian style
  401. */
  402. public void processFalseLinks()
  403. {
  404. Pattern pattern = null;
  405. Matcher matcher = null;
  406. StringBuffer sb = new StringBuffer();
  407. Boolean found = false;
  408. pattern = Pattern.compile("\\[([^\\]]*)\\]");
  409. matcher = pattern.matcher(text);
  410. while(matcher.find())
  411. {
  412. found = true;
  413. String link = matcher.group();
  414. String converted = link.replaceFirst("\\[", "\\\\\\\\[");
  415. converted = converted.replaceAll("\\]", "\\\\\\\\]");
  416. matcher.appendReplacement(sb, converted);
  417. }
  418. matcher.appendTail(sb);
  419. if(found)
  420. buffer = sb;
  421. else
  422. buffer = new StringBuffer(text);
  423. }
  424. }