PageRenderTime 122ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/src/main/java/org/olat/search/service/document/file/PowerPointOOXMLDocument.java

https://bitbucket.org/mg/olat
Java | 142 lines | 98 code | 16 blank | 28 comment | 15 complexity | 2571ccf2f7e2fae1fccea79a449e6010 MD5 | raw file
Possible License(s): LGPL-2.1, GPL-3.0, 0BSD, MPL-2.0-no-copyleft-exception, AGPL-3.0, Apache-2.0
  1. /**
  2. * OLAT - Online Learning and Training<br>
  3. * http://www.olat.org
  4. * <p>
  5. * Licensed under the Apache License, Version 2.0 (the "License"); <br>
  6. * you may not use this file except in compliance with the License.<br>
  7. * You may obtain a copy of the License at
  8. * <p>
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. * <p>
  11. * Unless required by applicable law or agreed to in writing,<br>
  12. * software distributed under the License is distributed on an "AS IS" BASIS, <br>
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
  14. * See the License for the specific language governing permissions and <br>
  15. * limitations under the License.
  16. * <p>
  17. * Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
  18. * University of Zurich, Switzerland.
  19. * <p>
  20. */
  21. package org.olat.search.service.document.file;
  22. import java.io.BufferedInputStream;
  23. import java.io.IOException;
  24. import org.apache.lucene.document.Document;
  25. import org.apache.poi.POIXMLDocument;
  26. import org.apache.poi.POIXMLTextExtractor;
  27. import org.apache.poi.extractor.ExtractorFactory;
  28. import org.apache.poi.xslf.XSLFSlideShow;
  29. import org.apache.poi.xslf.usermodel.XMLSlideShow;
  30. import org.apache.poi.xslf.usermodel.XSLFSlide;
  31. import org.apache.xmlbeans.XmlException;
  32. import org.olat.core.logging.OLog;
  33. import org.olat.core.logging.Tracing;
  34. import org.olat.core.util.vfs.VFSLeaf;
  35. import org.olat.search.service.SearchResourceContext;
  36. import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
  37. import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
  38. import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
  39. import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
  40. import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
  41. import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
  42. import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
  43. import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
  44. import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
  45. import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry;
  46. /**
  47. * Description:<br>
  48. * Parse the PowerPoint XML document (.pptx) with Apache POI
  49. * <P>
  50. * Initial Date: 14 dec. 2009 <br>
  51. *
  52. * @author srosse, stephane.rosse@frentix.com
  53. */
  54. public class PowerPointOOXMLDocument extends FileDocument {
  55. private static final OLog log = Tracing.createLoggerFor(PowerPointOOXMLDocument.class);
  56. public final static String FILE_TYPE = "type.file.ppt";
  57. public PowerPointOOXMLDocument() {
  58. super();
  59. }
  60. public static Document createDocument(final SearchResourceContext leafResourceContext, final VFSLeaf leaf) throws IOException, DocumentException,
  61. DocumentAccessException {
  62. final PowerPointOOXMLDocument powerPointDocument = new PowerPointOOXMLDocument();
  63. powerPointDocument.init(leafResourceContext, leaf);
  64. powerPointDocument.setFileType(FILE_TYPE);
  65. powerPointDocument.setCssIcon("b_filetype_ppt");
  66. if (log.isDebug()) {
  67. log.debug(powerPointDocument.toString());
  68. }
  69. return powerPointDocument.getLuceneDocument();
  70. }
  71. @Override
  72. public String readContent(final VFSLeaf leaf) throws IOException, DocumentException {
  73. BufferedInputStream bis = null;
  74. final StringBuilder buffy = new StringBuilder();
  75. try {
  76. bis = new BufferedInputStream(leaf.getInputStream());
  77. final POIXMLTextExtractor extractor = (POIXMLTextExtractor) ExtractorFactory.createExtractor(bis);
  78. final POIXMLDocument document = extractor.getDocument();
  79. if (document instanceof XSLFSlideShow) {
  80. final XSLFSlideShow slideShow = (XSLFSlideShow) document;
  81. final XMLSlideShow xmlSlideShow = new XMLSlideShow(slideShow);
  82. extractContent(buffy, xmlSlideShow);
  83. }
  84. return buffy.toString();
  85. } catch (final Exception e) {
  86. throw new DocumentException(e.getMessage());
  87. } finally {
  88. if (bis != null) {
  89. bis.close();
  90. }
  91. }
  92. }
  93. private void extractContent(final StringBuilder buffy, final XMLSlideShow xmlSlideShow) throws IOException, XmlException {
  94. final XSLFSlide[] slides = xmlSlideShow.getSlides();
  95. for (final XSLFSlide slide : slides) {
  96. final CTSlide rawSlide = slide._getCTSlide();
  97. final CTSlideIdListEntry slideId = slide._getCTSlideId();
  98. final CTNotesSlide notes = xmlSlideShow._getXSLFSlideShow().getNotes(slideId);
  99. final CTCommentList comments = xmlSlideShow._getXSLFSlideShow().getSlideComments(slideId);
  100. extractShapeContent(buffy, rawSlide.getCSld().getSpTree());
  101. if (comments != null) {
  102. for (final CTComment comment : comments.getCmArray()) {
  103. buffy.append(comment.getText()).append(' ');
  104. }
  105. }
  106. if (notes != null) {
  107. extractShapeContent(buffy, notes.getCSld().getSpTree());
  108. }
  109. }
  110. }
  111. private void extractShapeContent(final StringBuilder buffy, final CTGroupShape gs) {
  112. final CTShape[] shapes = gs.getSpArray();
  113. for (final CTShape shape : shapes) {
  114. final CTTextBody textBody = shape.getTxBody();
  115. if (textBody != null) {
  116. final CTTextParagraph[] paras = textBody.getPArray();
  117. for (final CTTextParagraph textParagraph : paras) {
  118. final CTRegularTextRun[] textRuns = textParagraph.getRArray();
  119. for (final CTRegularTextRun textRun : textRuns) {
  120. buffy.append(textRun.getT()).append(' ');
  121. }
  122. }
  123. }
  124. }
  125. }
  126. }