PageRenderTime 4998ms CodeModel.GetById 967ms RepoModel.GetById 6ms app.codeStats 0ms

/ontopia-classify/src/main/java/net/ontopia/topicmaps/classify/OOXMLPowerpointFormatModule.java

http://ontopia.googlecode.com/
Java | 46 lines | 33 code | 8 blank | 5 comment | 1 complexity | 0c3d3fd42467cfd648ebac739b344b90 MD5 | raw file
Possible License(s): LGPL-2.1, Apache-2.0
  1. package net.ontopia.topicmaps.classify;
  2. import java.io.*;
  3. import java.util.*;
  4. import net.ontopia.xml.*;
  5. import net.ontopia.utils.*;
  6. import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
  7. import org.apache.poi.openxml4j.opc.OPCPackage;
  8. /**
  9. * INTERNAL: A format module for the OOXML PresentationML format.
  10. */
  11. public class OOXMLPowerpointFormatModule implements FormatModuleIF {
  12. protected String[] extensions = new String[] {".pptx"};
  13. // these are really magic bytes for all zip files...
  14. protected byte[] magicBytes = new byte[] {
  15. (byte) 0x50, (byte) 0x4B, (byte) 0x03, (byte) 0x04 };
  16. public boolean matchesContent(ClassifiableContentIF cc) {
  17. return false;
  18. }
  19. public boolean matchesIdentifier(ClassifiableContentIF cc) {
  20. boolean matches = FormatModule.matchesExtension(cc.getIdentifier(), extensions);
  21. if (!matches) return false;
  22. // name matches, then check office magic bytes
  23. return FormatModule.startsWith(cc.getContent(), magicBytes);
  24. }
  25. public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) {
  26. try {
  27. OPCPackage opc = OPCPackage.open(new ByteArrayInputStream(cc.getContent()));
  28. XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(opc);
  29. String s = extractor.getText();
  30. char[] c = s.toCharArray();
  31. handler.startRegion("document");
  32. handler.text(c, 0, c.length);
  33. handler.endRegion();
  34. } catch (Exception e) {
  35. throw new OntopiaRuntimeException(e);
  36. }
  37. }
  38. }