/src/test/java/com/eastrobot/doc/util/PdfToHtmlTests.java

https://github.com/ekoz/kbase-doc · Java · 36 lines · 22 code · 5 blank · 9 comment · 0 complexity · 552c7d20ca78589ef5d83a1fff0c6e15 MD5 · raw file

  1. /*
  2. * Power by www.xiaoi.com
  3. */
  4. package com.eastrobot.doc.util;
  5. import java.io.File;
  6. import java.io.FileOutputStream;
  7. import java.io.IOException;
  8. import java.io.OutputStreamWriter;
  9. import javax.xml.parsers.ParserConfigurationException;
  10. import org.apache.pdfbox.pdmodel.PDDocument;
  11. import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
  12. import org.fit.pdfdom.PDFDomTree;
  13. import org.junit.Test;
  14. import org.w3c.dom.Document;
  15. /**
  16. * @author <a href="mailto:eko.z@outlook.com">eko.zhan</a>
  17. * @date 2018年7月10日 下午5:00:24
  18. * @version 1.0
  19. */
  20. public class PdfToHtmlTests {
  21. @Test
  22. public void pdf2html() throws InvalidPasswordException, IOException, ParserConfigurationException{
  23. String srcFilePath = "E:\\converter-html\\sgcc\\京电发展〔2019〕82号(盖章).ceb";
  24. String destFilePath = srcFilePath + ".html";
  25. PDDocument pdf = PDDocument.load(new File(srcFilePath));
  26. PDFDomTree parser = new PDFDomTree();
  27. // Document dom = parser.createDOM(pdf);
  28. File htmlFile = new File(destFilePath);
  29. parser.writeText(pdf, new OutputStreamWriter(new FileOutputStream(htmlFile)));
  30. }
  31. }