PageRenderTime 5267ms CodeModel.GetById 31ms RepoModel.GetById 15ms app.codeStats 0ms

/ppt/scratchpad/src/org/apache/poi/hslf/dev/PPTXMLDump.java

https://github.com/isdom/POI-Android
Java | 255 lines | 170 code | 29 blank | 56 comment | 27 complexity | 3844f1d90c6ccd2d52a6152d13a6c673 MD5 | raw file
  1. /* ====================================================================
  2. Licensed to the Apache Software Foundation (ASF) under one or more
  3. contributor license agreements. See the NOTICE file distributed with
  4. this work for additional information regarding copyright ownership.
  5. The ASF licenses this file to You under the Apache License, Version 2.0
  6. (the "License"); you may not use this file except in compliance with
  7. the License. You may obtain a copy of the License at
  8. http://www.apache.org/licenses/LICENSE-2.0
  9. Unless required by applicable law or agreed to in writing, software
  10. distributed under the License is distributed on an "AS IS" BASIS,
  11. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. See the License for the specific language governing permissions and
  13. limitations under the License.
  14. ==================================================================== */
  15. package org.apache.poi.hslf.dev;
  16. import org.apache.poi.util.LittleEndian;
  17. import org.apache.poi.hslf.record.RecordTypes;
  18. import org.apache.poi.poifs.filesystem.*;
  19. import java.io.*;
  20. /**
  21. * Utility class which dumps raw contents of a ppt file into XML format
  22. *
  23. * @author Yegor Kozlov
  24. */
  25. public final class PPTXMLDump {
  26. public static final int HEADER_SIZE = 8; //size of the record header
  27. public static final int PICT_HEADER_SIZE = 25; //size of the picture header
  28. public final static String PPDOC_ENTRY = "PowerPoint Document";
  29. public final static String PICTURES_ENTRY = "Pictures";
  30. public static String CR = System.getProperty("line.separator");
  31. protected Writer out;
  32. protected byte[] docstream;
  33. protected byte[] pictstream;
  34. protected boolean hexHeader = true;
  35. public PPTXMLDump(File ppt) throws IOException {
  36. FileInputStream fis = new FileInputStream(ppt);
  37. POIFSFileSystem fs = new POIFSFileSystem(fis);
  38. fis.close();
  39. //read the document entry from OLE file system
  40. DocumentEntry entry = (DocumentEntry)fs.getRoot().getEntry(PPDOC_ENTRY);
  41. docstream = new byte[entry.getSize()];
  42. DocumentInputStream is = fs.createDocumentInputStream(PPDOC_ENTRY);
  43. is.read(docstream);
  44. try {
  45. entry = (DocumentEntry)fs.getRoot().getEntry(PICTURES_ENTRY);
  46. pictstream = new byte[entry.getSize()];
  47. is = fs.createDocumentInputStream(PICTURES_ENTRY);
  48. is.read(pictstream);
  49. } catch(FileNotFoundException e){
  50. //silently catch errors if the presentation does not contain pictures
  51. }
  52. }
  53. /**
  54. * Dump the structure of the supplied PPT file into XML
  55. * @param out <code>Writer</code> to write out
  56. * @throws java.io.IOException
  57. */
  58. public void dump(Writer out) throws IOException {
  59. this.out = out;
  60. int padding = 0;
  61. write(out, "<Presentation>" + CR, padding);
  62. padding++;
  63. if (pictstream != null){
  64. write(out, "<Pictures>" + CR, padding);
  65. dumpPictures(pictstream, padding);
  66. write(out, "</Pictures>" + CR, padding);
  67. }
  68. //dump the structure of the powerpoint document
  69. write(out, "<PowerPointDocument>" + CR, padding);
  70. padding++;
  71. dump(docstream, 0, docstream.length, padding);
  72. padding--;
  73. write(out, "</PowerPointDocument>" + CR, padding);
  74. padding--;
  75. write(out, "</Presentation>", padding);
  76. }
  77. /**
  78. * Dump a part of the document stream into XML
  79. * @param data PPT binary data
  80. * @param offset offset from the beginning of the document
  81. * @param length of the document
  82. * @param padding used for formatting results
  83. * @throws java.io.IOException
  84. */
  85. public void dump(byte[] data, int offset, int length, int padding) throws IOException {
  86. int pos = offset;
  87. while (pos <= (offset + length - HEADER_SIZE)){
  88. if (pos < 0) break;
  89. //read record header
  90. int info = LittleEndian.getUShort(data, pos);
  91. pos += LittleEndian.SHORT_SIZE;
  92. int type = LittleEndian.getUShort(data, pos);
  93. pos += LittleEndian.SHORT_SIZE;
  94. int size = (int)LittleEndian.getUInt(data, pos);
  95. pos += LittleEndian.INT_SIZE;
  96. //get name of the record by type
  97. String recname = RecordTypes.recordName(type);
  98. write(out, "<"+recname + " info=\""+info+"\" type=\""+type+"\" size=\""+size+"\" offset=\""+(pos-8)+"\"", padding);
  99. if (hexHeader){
  100. out.write(" header=\"");
  101. dump(out, data, pos-8, 8, 0, false);
  102. out.write("\"");
  103. }
  104. out.write(">" + CR);
  105. padding++;
  106. //this check works both for Escher and PowerPoint records
  107. boolean isContainer = (info & 0x000F) == 0x000F;
  108. if (isContainer) {
  109. //continue to dump child records
  110. dump(data, pos, size, padding);
  111. } else {
  112. //dump first 100 bytes of the atom data
  113. dump(out, data, pos, Math.min(size, data.length-pos), padding, true);
  114. }
  115. padding--;
  116. write(out, "</"+recname + ">" + CR, padding);
  117. pos += size;
  118. }
  119. }
  120. /**
  121. * Dumps the Pictures OLE stream into XML.
  122. *
  123. * @param data from the Pictures OLE data stream
  124. * @param padding
  125. * @throws java.io.IOException
  126. */
  127. public void dumpPictures(byte[] data, int padding) throws IOException {
  128. int pos = 0;
  129. while (pos < data.length) {
  130. byte[] header = new byte[PICT_HEADER_SIZE];
  131. System.arraycopy(data, pos, header, 0, header.length);
  132. int size = LittleEndian.getInt(header, 4) - 17;
  133. byte[] pictdata = new byte[size];
  134. System.arraycopy(data, pos + PICT_HEADER_SIZE, pictdata, 0, pictdata.length);
  135. pos += PICT_HEADER_SIZE + size;
  136. padding++;
  137. write(out, "<picture size=\""+size+"\" type=\""+getPictureType(header)+"\">" + CR, padding);
  138. padding++;
  139. write(out, "<header>" + CR, padding);
  140. dump(out, header, 0, header.length, padding, true);
  141. write(out, "</header>" + CR, padding);
  142. write(out, "<imgdata>" + CR, padding);
  143. dump(out, pictdata, 0, Math.min(pictdata.length, 100), padding, true);
  144. write(out, "</imgdata>" + CR, padding);
  145. padding--;
  146. write(out, "</picture>" + CR, padding);
  147. padding--;
  148. }
  149. }
  150. public static void main(String[] args) throws Exception {
  151. if (args.length == 0){
  152. System.out.println(
  153. "Usage: PPTXMLDump (options) pptfile\n" +
  154. "Where options include:\n" +
  155. " -f write output to <pptfile>.xml file in the current directory"
  156. );
  157. return;
  158. }
  159. boolean outFile = false;
  160. for (int i = 0; i < args.length; i++){
  161. if (args[i].startsWith("-")) {
  162. if ("-f".equals(args[i])){
  163. //write ouput to a file
  164. outFile = true;
  165. }
  166. } else {
  167. File ppt = new File(args[i]);
  168. PPTXMLDump dump = new PPTXMLDump(ppt);
  169. System.out.println("Dumping " + args[i]);
  170. if (outFile){
  171. FileWriter out = new FileWriter(ppt.getName() + ".xml");
  172. dump.dump(out);
  173. out.close();
  174. } else {
  175. StringWriter out = new StringWriter();
  176. dump.dump(out);
  177. System.out.println(out.toString());
  178. }
  179. }
  180. }
  181. }
  182. /**
  183. * write a string to <code>out</code> with the specified padding
  184. */
  185. private static void write(Writer out, String str, int padding) throws IOException {
  186. for (int i = 0; i < padding; i++) out.write(" ");
  187. out.write(str);
  188. }
  189. private String getPictureType(byte[] header){
  190. String type;
  191. int meta = LittleEndian.getUShort(header, 0);
  192. switch(meta){
  193. case 0x46A0: type = "jpeg"; break;
  194. case 0x2160: type = "wmf"; break;
  195. case 0x6E00: type = "png"; break;
  196. default: type = "unknown"; break;
  197. }
  198. return type;
  199. }
  200. /**
  201. * dump binary data to <code>out</code> with the specified padding
  202. */
  203. private static void dump(Writer out, byte[] data, int offset, int length, int padding, boolean nl) throws IOException {
  204. int linesize = 25;
  205. for (int i = 0; i < padding; i++) out.write(" ");
  206. int i;
  207. for (i = offset; i < (offset + length); i++) {
  208. int c = data[i];
  209. out.write((char) hexval[(c & 0xF0) >> 4]);
  210. out.write((char) hexval[(c & 0x0F) >> 0]);
  211. out.write(' ');
  212. if((i+1-offset) % linesize == 0 && i != (offset + length-1)) {
  213. out.write(CR);
  214. for (int j = 0; j < padding; j++) out.write(" ");
  215. }
  216. }
  217. if(nl && length > 0)out.write(CR);
  218. }
  219. private static final byte hexval[] =
  220. {(byte) '0', (byte) '1', (byte) '2', (byte) '3',
  221. (byte) '4', (byte) '5', (byte) '6', (byte) '7',
  222. (byte) '8', (byte) '9', (byte) 'A', (byte) 'B',
  223. (byte) 'C', (byte) 'D', (byte) 'E', (byte) 'F'};
  224. }