PageRenderTime 63ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/filesearch/SearchLocalFile/src/com/searchlocal/filereader/PptReader.java

http://filesearch.googlecode.com/
Java | 137 lines | 122 code | 10 blank | 5 comment | 12 complexity | e80907b3feb10a010e2946da79d05656 MD5 | raw file
  1. package com.searchlocal.filereader;
  2. import java.io.FileInputStream;
  3. import java.io.FileNotFoundException;
  4. import java.io.IOException;
  5. import java.util.ArrayList;
  6. import java.util.List;
  7. import org.apache.poi.hslf.HSLFSlideShow;
  8. import org.apache.poi.hslf.model.Slide;
  9. import org.apache.poi.hslf.model.TextRun;
  10. import org.apache.poi.hslf.usermodel.SlideShow;
  11. import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
  12. import org.apache.poi.xslf.XSLFSlideShow;
  13. import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
  14. import org.apache.poi.xslf.usermodel.XMLSlideShow;
  15. import org.apache.poi.xslf.usermodel.XSLFSlide;
  16. import org.apache.xmlbeans.XmlException;
  17. import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
  18. import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
  19. import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
  20. import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
  21. import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
  22. import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
  23. import com.searchlocal.bean.PptFileBean;
  24. import com.searchlocal.exception.LogicException;
  25. import com.searchlocal.util.CLogger;
  26. import com.searchlocal.util.StringUtils;
  27. public class PptReader {
  28. private static CLogger logger = new CLogger(PptReader.class);
  29. public PptReader() {
  30. super();
  31. }
  32. public List<PptFileBean> getPptFile(PptFileBean pptBean) throws LogicException {
  33. List<PptFileBean> pptEntityList = new ArrayList<PptFileBean>();
  34. FileInputStream is = null;
  35. String pptFilePath = pptBean.getPath();
  36. try {
  37. is = new FileInputStream(pptFilePath);
  38. SlideShow ss;
  39. Slide[] slides = null;
  40. if (StringUtils.is2007Doc(pptFilePath)) {
  41. // ???????xml??
  42. XMLSlideShow xmlslideshow = null;
  43. try {
  44. xmlslideshow = new XMLSlideShow(new XSLFSlideShow(pptFilePath));
  45. // ??xml?????????ppt??
  46. XSLFPowerPointExtractor ppt = new XSLFPowerPointExtractor(xmlslideshow);
  47. return getBeanList(pptBean, xmlslideshow.getSlides());
  48. } catch (XmlException e) {
  49. // TODO ??????(??I/O?)
  50. e.printStackTrace();
  51. } catch (OpenXML4JException e) {
  52. // TODO ??????(??I/O?)
  53. e.printStackTrace();
  54. }
  55. } else {
  56. ss = new SlideShow(new HSLFSlideShow(is));
  57. slides = ss.getSlides();
  58. }
  59. PptFileBean entity = null;
  60. StringBuffer content = null;
  61. for (int i = 0; i < slides.length; i++) {
  62. entity = new PptFileBean();
  63. content = new StringBuffer();
  64. content.append(slides[i].getTitle());
  65. TextRun[] t = slides[i].getTextRuns();
  66. for (int j = 0; j < t.length; j++) {
  67. content.append(t[j].getText());
  68. }
  69. entity = new PptFileBean();
  70. entity.setContent(content.toString());
  71. entity.setPage(i);
  72. entity.setLastmodify(pptBean.getLastmodify());
  73. entity.setPath(pptBean.getPath());
  74. entity.setFilename(pptBean.getFilename());
  75. pptEntityList.add(entity);
  76. }
  77. } catch (FileNotFoundException e) {
  78. logger.error("LG_E001", pptFilePath);
  79. throw new LogicException("LG_E001", e);
  80. } catch (IOException e) {
  81. logger.error("LG_E003", pptFilePath);
  82. throw new LogicException("LG_E003", e);
  83. } finally {
  84. try {
  85. if (is != null) {
  86. is.close();
  87. }
  88. } catch (IOException e) {
  89. // TODO Auto-generated catch block
  90. e.printStackTrace();
  91. }
  92. }
  93. return pptEntityList;
  94. }
  95. public List getBeanList(PptFileBean pptBean, XSLFSlide[] slides) {
  96. PptFileBean entity = null;
  97. StringBuffer content = null;
  98. List beanList = new ArrayList();
  99. for (int i = 0; i < slides.length; i++) {
  100. entity = new PptFileBean();
  101. content = new StringBuffer();
  102. org.apache.poi.xslf.usermodel.XSLFSlide xslfSlide = slides[i];
  103. CTSlide rawSlide = xslfSlide._getCTSlide();
  104. CTGroupShape gs = rawSlide.getCSld().getSpTree();
  105. CTShape[] shapes = gs.getSpArray();
  106. for (CTShape shape : shapes) {
  107. CTTextBody tb = shape.getTxBody();
  108. if (null == tb)
  109. continue;
  110. CTTextParagraph[] paras = tb.getPArray();
  111. for (CTTextParagraph textParagraph : paras) {
  112. CTRegularTextRun[] textRuns = textParagraph.getRArray();
  113. for (CTRegularTextRun textRun : textRuns) {
  114. content.append(textRun.getT());
  115. }
  116. }
  117. }
  118. entity.setLastmodify(pptBean.getLastmodify());
  119. entity.setPath(pptBean.getPath());
  120. entity.setFilename(pptBean.getFilename());
  121. entity.setContent(content.toString());
  122. entity.setPage(i);
  123. beanList.add(entity);
  124. }
  125. return beanList;
  126. }
  127. }