PageRenderTime 73ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/shoudaw/msword/MsWordDoc.java

https://bitbucket.org/johnnywsd/java-extractemailfromdocdocxpdftxt
Java | 29 lines | 18 code | 9 blank | 2 comment | 0 complexity | 7dcf9a408bc424cfbaf5f91e7743265a MD5 | raw file
  1. package shoudaw.msword;
  2. import java.io.FileInputStream;
  3. import org.apache.poi.hwpf.HWPFDocument;
  4. import org.apache.poi.hwpf.extractor.WordExtractor;
  5. public class MsWordDoc {
  6. public static String getString(String filePath) throws Exception{
  7. try {
  8. // System.out.println("Proceeding:"+filePath);
  9. HWPFDocument document=new HWPFDocument(new FileInputStream(filePath));
  10. WordExtractor extractor = new WordExtractor(document);
  11. String text = extractor.getText();
  12. return text;
  13. } catch (Exception e) {
  14. // TODO Auto-generated catch block
  15. e.printStackTrace();
  16. String errStr = String.format("Error: %s , Please extract the email manually", filePath);
  17. throw new Exception(errStr);
  18. }
  19. }
  20. }