/src/main/java/org/olat/search/service/document/file/PPT2Text.java
Java | 89 lines | 53 code | 11 blank | 25 comment | 5 complexity | 2d2c96a01ac84d4a82d4b810991dce3f MD5 | raw file
Possible License(s): LGPL-2.1, GPL-3.0, 0BSD, MPL-2.0-no-copyleft-exception, AGPL-3.0, Apache-2.0
- /**
- * OLAT - Online Learning and Training<br>
- * http://www.olat.org
- * <p>
- * Licensed under the Apache License, Version 2.0 (the "License"); <br>
- * you may not use this file except in compliance with the License.<br>
- * You may obtain a copy of the License at
- * <p>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p>
- * Unless required by applicable law or agreed to in writing,<br>
- * software distributed under the License is distributed on an "AS IS" BASIS, <br>
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
- * See the License for the specific language governing permissions and <br>
- * limitations under the License.
- * <p>
- * Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
- * University of Zurich, Switzerland.
- * <p>
- */
- package org.olat.search.service.document.file;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import org.apache.poi.poifs.eventfilesystem.POIFSReader;
- import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
- import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
- import org.apache.poi.poifs.filesystem.DocumentInputStream;
- import org.apache.poi.util.LittleEndian;
- import org.olat.core.logging.OLog;
- import org.olat.core.logging.Tracing;
- /**
- * @author Christian Guretzki
- */
- public class PPT2Text {
- public static void extractText(final InputStream inStream, final OutputStream stream) throws IOException {
- final POIFSReader r = new POIFSReader();
- /* Register a listener for *all* documents. */
- r.registerListener(new MyPOIFSReaderListener(stream));
- r.read(inStream);
- }
- static class MyPOIFSReaderListener implements POIFSReaderListener {
- private static final OLog log = Tracing.createLoggerFor(PPT2Text.class);
- private final OutputStream oStream;
- public MyPOIFSReaderListener(final OutputStream oStream) {
- this.oStream = oStream;
- }
- @Override
- public void processPOIFSReaderEvent(final POIFSReaderEvent event) {
- int errorCounter = 0;
- try {
- DocumentInputStream dis = null;
- dis = event.getStream();
- final byte btoWrite[] = new byte[dis.available()];
- dis.read(btoWrite, 0, dis.available());
- for (int i = 0; i < btoWrite.length - 20; i++) {
- final long type = LittleEndian.getUShort(btoWrite, i + 2);
- final long size = LittleEndian.getUInt(btoWrite, i + 4);
- if (type == 4008) {
- try {
- oStream.write(btoWrite, i + 4 + 1, (int) size + 3);
- } catch (final IndexOutOfBoundsException ex) {
- errorCounter++;
- }
- }
- }
- } catch (final Exception ex) {
- // FIXME:chg: Remove general Exception later, for now make it run
- log.warn("Can not read PPT content.", ex);
- }
- if (errorCounter > 0) {
- if (log.isDebug()) {
- log.debug("Could not parse ppt properly. There were " + errorCounter + " IndexOutOfBoundsException");
- }
- }
- }
- }
- }