PageRenderTime 48ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/src/main/java/org/olat/search/service/document/file/PPT2Text.java

https://bitbucket.org/mg/olat
Java | 89 lines | 53 code | 11 blank | 25 comment | 5 complexity | 2d2c96a01ac84d4a82d4b810991dce3f MD5 | raw file
Possible License(s): LGPL-2.1, GPL-3.0, 0BSD, MPL-2.0-no-copyleft-exception, AGPL-3.0, Apache-2.0
  1. /**
  2. * OLAT - Online Learning and Training<br>
  3. * http://www.olat.org
  4. * <p>
  5. * Licensed under the Apache License, Version 2.0 (the "License"); <br>
  6. * you may not use this file except in compliance with the License.<br>
  7. * You may obtain a copy of the License at
  8. * <p>
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. * <p>
  11. * Unless required by applicable law or agreed to in writing,<br>
  12. * software distributed under the License is distributed on an "AS IS" BASIS, <br>
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
  14. * See the License for the specific language governing permissions and <br>
  15. * limitations under the License.
  16. * <p>
  17. * Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
  18. * University of Zurich, Switzerland.
  19. * <p>
  20. */
  21. package org.olat.search.service.document.file;
  22. import java.io.IOException;
  23. import java.io.InputStream;
  24. import java.io.OutputStream;
  25. import org.apache.poi.poifs.eventfilesystem.POIFSReader;
  26. import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
  27. import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
  28. import org.apache.poi.poifs.filesystem.DocumentInputStream;
  29. import org.apache.poi.util.LittleEndian;
  30. import org.olat.core.logging.OLog;
  31. import org.olat.core.logging.Tracing;
  32. /**
  33. * @author Christian Guretzki
  34. */
  35. public class PPT2Text {
  36. public static void extractText(final InputStream inStream, final OutputStream stream) throws IOException {
  37. final POIFSReader r = new POIFSReader();
  38. /* Register a listener for *all* documents. */
  39. r.registerListener(new MyPOIFSReaderListener(stream));
  40. r.read(inStream);
  41. }
  42. static class MyPOIFSReaderListener implements POIFSReaderListener {
  43. private static final OLog log = Tracing.createLoggerFor(PPT2Text.class);
  44. private final OutputStream oStream;
  45. public MyPOIFSReaderListener(final OutputStream oStream) {
  46. this.oStream = oStream;
  47. }
  48. @Override
  49. public void processPOIFSReaderEvent(final POIFSReaderEvent event) {
  50. int errorCounter = 0;
  51. try {
  52. DocumentInputStream dis = null;
  53. dis = event.getStream();
  54. final byte btoWrite[] = new byte[dis.available()];
  55. dis.read(btoWrite, 0, dis.available());
  56. for (int i = 0; i < btoWrite.length - 20; i++) {
  57. final long type = LittleEndian.getUShort(btoWrite, i + 2);
  58. final long size = LittleEndian.getUInt(btoWrite, i + 4);
  59. if (type == 4008) {
  60. try {
  61. oStream.write(btoWrite, i + 4 + 1, (int) size + 3);
  62. } catch (final IndexOutOfBoundsException ex) {
  63. errorCounter++;
  64. }
  65. }
  66. }
  67. } catch (final Exception ex) {
  68. // FIXME:chg: Remove general Exception later, for now make it run
  69. log.warn("Can not read PPT content.", ex);
  70. }
  71. if (errorCounter > 0) {
  72. if (log.isDebug()) {
  73. log.debug("Could not parse ppt properly. There were " + errorCounter + " IndexOutOfBoundsException");
  74. }
  75. }
  76. }
  77. }
  78. }