PageRenderTime 4597ms CodeModel.GetById 27ms RepoModel.GetById 2ms app.codeStats 0ms

/src/main/java/com/marimon/Msg2Txt.java

https://bitbucket.org/ignasi35/msg-reader
Java | 191 lines | 117 code | 17 blank | 57 comment | 17 complexity | 60eb9b0468e7f4ac0d8c051c88c1f214 MD5 | raw file
  1. package com.marimon;
  2. /* ====================================================================
  3. Licensed to the Apache Software Foundation (ASF) under one or more
  4. contributor license agreements. See the NOTICE file distributed with
  5. this work for additional information regarding copyright ownership.
  6. The ASF licenses this file to You under the Apache License, Version 2.0
  7. (the "License"); you may not use this file except in compliance with
  8. the License. You may obtain a copy of the License at
  9. http://www.apache.org/licenses/LICENSE-2.0
  10. Unless required by applicable law or agreed to in writing, software
  11. distributed under the License is distributed on an "AS IS" BASIS,
  12. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. See the License for the specific language governing permissions and
  14. limitations under the License.
  15. ==================================================================== */
  16. import java.io.File;
  17. import java.io.FileOutputStream;
  18. import java.io.IOException;
  19. import java.io.OutputStream;
  20. import java.io.PrintWriter;
  21. import org.apache.poi.hsmf.MAPIMessage;
  22. import org.apache.poi.hsmf.datatypes.AttachmentChunks;
  23. import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
  24. /**
  25. * Reads one or several Outlook MSG files and for each of them creates a text
  26. * file from available chunks and a directory that contains attachments.
  27. *
  28. * @author Bruno Girin
  29. */
  30. public class Msg2Txt {
  31. /**
  32. * The stem used to create file names for the text file and the directory
  33. * that contains the attachments.
  34. */
  35. private String fileNameStem;
  36. /**
  37. * The Outlook MSG file being processed.
  38. */
  39. private final MAPIMessage msg;
  40. public Msg2Txt(final String fileName) throws IOException {
  41. fileNameStem = fileName;
  42. if (fileNameStem.endsWith(".msg") || fileNameStem.endsWith(".MSG")) {
  43. fileNameStem = fileNameStem.substring(0, fileNameStem.length() - 4);
  44. }
  45. msg = new MAPIMessage(fileName);
  46. }
  47. /**
  48. * Processes the message.
  49. *
  50. * @throws IOException
  51. * if an exception occurs while writing the message out
  52. */
  53. public void processMessage() throws IOException {
  54. String txtFileName = fileNameStem + ".txt";
  55. String attDirName = fileNameStem + "-att";
  56. PrintWriter txtOut = null;
  57. try {
  58. String[] headers;
  59. try {
  60. headers = msg.getHeaders();
  61. for (String string : headers) {
  62. if (string.contains("From:")) {
  63. System.out.println(string);
  64. }
  65. }
  66. } catch (ChunkNotFoundException e1) {
  67. }
  68. txtOut = new PrintWriter(txtFileName);
  69. try {
  70. String displayFrom = msg.getDisplayFrom();
  71. txtOut.println("From: " + displayFrom);
  72. } catch (ChunkNotFoundException e) {
  73. // ignore
  74. }
  75. try {
  76. String displayTo = msg.getDisplayTo();
  77. txtOut.println("To: " + displayTo);
  78. } catch (ChunkNotFoundException e) {
  79. // ignore
  80. }
  81. try {
  82. String displayCC = msg.getDisplayCC();
  83. txtOut.println("CC: " + displayCC);
  84. } catch (ChunkNotFoundException e) {
  85. // ignore
  86. }
  87. try {
  88. String displayBCC = msg.getDisplayBCC();
  89. txtOut.println("BCC: " + displayBCC);
  90. } catch (ChunkNotFoundException e) {
  91. // ignore
  92. }
  93. try {
  94. String subject = msg.getSubject();
  95. txtOut.println("Subject: " + subject);
  96. } catch (ChunkNotFoundException e) {
  97. // ignore
  98. }
  99. try {
  100. String body = msg.getTextBody();
  101. txtOut.println(body);
  102. } catch (ChunkNotFoundException e) {
  103. System.err.println("No message body");
  104. }
  105. AttachmentChunks[] attachments = msg.getAttachmentFiles();
  106. if (attachments.length > 0) {
  107. File d = new File(attDirName);
  108. if (d.mkdir()) {
  109. for (AttachmentChunks attachment : attachments) {
  110. processAttachment(attachment, d);
  111. }
  112. } else {
  113. System.err.println("Can't create directory " + attDirName);
  114. }
  115. }
  116. } finally {
  117. if (txtOut != null) {
  118. txtOut.close();
  119. }
  120. }
  121. }
  122. /**
  123. * Processes a single attachment: reads it from the Outlook MSG file and
  124. * writes it to disk as an individual file.
  125. *
  126. * @param attachment
  127. * the chunk group describing the attachment
  128. * @param dir
  129. * the directory in which to write the attachment file
  130. * @throws IOException
  131. * when any of the file operations fails
  132. */
  133. public void processAttachment(final AttachmentChunks attachment,
  134. final File dir) throws IOException {
  135. String fileName = attachment.attachFileName.toString();
  136. if (attachment.attachLongFileName != null) {
  137. fileName = attachment.attachLongFileName.toString();
  138. }
  139. File f = new File(dir, fileName);
  140. OutputStream fileOut = null;
  141. try {
  142. fileOut = new FileOutputStream(f);
  143. fileOut.write(attachment.attachData.getValue());
  144. } finally {
  145. if (fileOut != null) {
  146. fileOut.close();
  147. }
  148. }
  149. }
  150. /**
  151. * Processes the list of arguments as a list of names of Outlook MSG files.
  152. *
  153. * @param args
  154. * the list of MSG files to process
  155. */
  156. public static void main(final String[] args) {
  157. if (args.length <= 0) {
  158. System.err.println("No files names provided");
  159. } else {
  160. for (int i = 0; i < args.length; i++) {
  161. try {
  162. Msg2Txt processor = new Msg2Txt(args[i]);
  163. processor.processMessage();
  164. System.out.println("file processed!");
  165. } catch (IOException e) {
  166. System.err.println("Could not process " + args[i] + ": "
  167. + e);
  168. }
  169. }
  170. }
  171. }
  172. }