PageRenderTime 51ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/src/edu/harvard/hul/ois/fits/tools/fileutility/FileUtility.java

https://gitlab.com/math4youbyusgroupillinois/fits
Java | 281 lines | 183 code | 30 blank | 68 comment | 43 complexity | c657f1d3cb7e1729eab6b001fc1ad5b6 MD5 | raw file
Possible License(s): GPL-3.0
  1. /*
  2. * Copyright 2009 Harvard University Library
  3. *
  4. * This file is part of FITS (File Information Tool Set).
  5. *
  6. * FITS is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU Lesser General Public License as published by
  8. * the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * FITS is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public License
  17. * along with FITS. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. package edu.harvard.hul.ois.fits.tools.fileutility;
  20. import java.io.File;
  21. import java.util.ArrayList;
  22. import java.util.Arrays;
  23. import java.util.List;
  24. import java.util.regex.Matcher;
  25. import java.util.regex.Pattern;
  26. import org.apache.commons.lang.StringEscapeUtils;
  27. import org.apache.log4j.Logger;
  28. import org.jdom.Document;
  29. import org.jdom.Element;
  30. import edu.harvard.hul.ois.fits.Fits;
  31. import edu.harvard.hul.ois.fits.exceptions.FitsToolCLIException;
  32. import edu.harvard.hul.ois.fits.exceptions.FitsToolException;
  33. import edu.harvard.hul.ois.fits.tools.ToolBase;
  34. import edu.harvard.hul.ois.fits.tools.ToolInfo;
  35. import edu.harvard.hul.ois.fits.tools.ToolOutput;
  36. import edu.harvard.hul.ois.fits.tools.utils.CommandLine;
  37. public class FileUtility extends ToolBase {
  38. private boolean osIsWindows = false;
  39. private boolean osHasTool = false;
  40. private List<String> WIN_COMMAND = new ArrayList<String>(Arrays.asList(Fits.FITS_TOOLS+"file_utility_windows/bin/file.exe"));
  41. private List<String> UNIX_COMMAND = new ArrayList<String>(Arrays.asList("file"));
  42. private List<String> FILE_TEST_COMMAND = new ArrayList<String>(Arrays.asList("which", "file"));
  43. private final static String WIN_FILE_DATE = "6/7/2008";
  44. private boolean enabled = true;
  45. private static final Logger logger = Logger.getLogger(FileUtility.class);
  46. public final static String xslt = Fits.FITS_XML+"fileutility/fileutility_to_fits.xslt";
  47. public FileUtility() throws FitsToolException{
  48. logger.debug ("Initializing FileUtility");
  49. String osName = System.getProperty("os.name");
  50. info = new ToolInfo();
  51. String versionOutput = null;
  52. List<String> infoCommand = new ArrayList<String>();
  53. info.setName("file utility");
  54. if (osName.startsWith("Windows")) {
  55. //use provided Windows File Utility
  56. osIsWindows = true;
  57. info.setDate(WIN_FILE_DATE);
  58. infoCommand.addAll(WIN_COMMAND);
  59. logger.debug("FileUtility will use Windows environment");
  60. }
  61. else if (testOSForCommand()){
  62. osHasTool = true;
  63. //use file command in operating system
  64. infoCommand.addAll(UNIX_COMMAND);
  65. logger.debug("FileUtility will use system command");
  66. }
  67. else {
  68. //Tool cannot be used on this system
  69. logger.error("File Utility cannot be used on this system");
  70. throw new FitsToolException("File Utility cannot be used on this system");
  71. }
  72. infoCommand.add("-v");
  73. versionOutput = CommandLine.exec(infoCommand,null);
  74. String[] lines = versionOutput.split("\n");
  75. String firstLine = lines[0];
  76. String[] nameVersion = firstLine.split("-");
  77. info.setVersion(nameVersion[nameVersion.length-1].trim());
  78. info.setNote(lines[1]);
  79. }
  80. public ToolOutput extractInfo(File file) throws FitsToolException {
  81. logger.debug("FileUtility.extractInfo starting");
  82. long startTime = System.currentTimeMillis();
  83. List<String> execCommand = new ArrayList<String>();
  84. if (osIsWindows) {
  85. //use provided Windows File Utility
  86. execCommand.addAll(WIN_COMMAND);
  87. } else {
  88. //use file command in operating system
  89. execCommand.addAll(UNIX_COMMAND);
  90. }
  91. execCommand.add("-b"); // omit file name in output
  92. if(info.getVersion().startsWith("5")) {
  93. execCommand.add("-e"); // exclude specified test
  94. execCommand.add("cdf"); // details of Compound Document Files
  95. }
  96. execCommand.add(file.getPath());
  97. String execOut = CommandLine.exec(execCommand,null);
  98. if(execOut != null && execOut.length() > 0) {
  99. execOut = execOut.trim();
  100. }
  101. else {
  102. execOut = "";
  103. }
  104. execCommand.add(1, "--mime"); // options must come before file path
  105. String execMimeOut = CommandLine.exec(execCommand,null);
  106. if(execMimeOut != null && execMimeOut.length() > 0) {
  107. execMimeOut = execMimeOut.trim();
  108. }
  109. else {
  110. execMimeOut = "";
  111. }
  112. String format = null;
  113. String mime = null;
  114. String charset = null;
  115. List<String> linebreaks = new ArrayList<String>();
  116. //if mime indicates plain text
  117. if(execMimeOut.startsWith("text/") && execMimeOut.contains("charset=")) {
  118. //mime = "text/plain";
  119. mime = execMimeOut.substring(0,execMimeOut.indexOf("; charset="));
  120. charset = execMimeOut.substring(execMimeOut.indexOf("=")+1);
  121. charset = charset.toUpperCase();
  122. /*if(execOut.contains("ASCII text") ||
  123. execOut.contains("Unicode text, UTF-32") ||
  124. execOut.contains("UTF-8 Unicode") ||
  125. execOut.contains("UTF-16 Unicode") ||
  126. execOut.contains("Non-ISO extended-ASCII text") ||
  127. execOut.contains("ISO-8859")) {
  128. format = "Plain text";
  129. }*/
  130. format = "Plain text";
  131. Pattern p = Pattern.compile("(.*) with (.*) line terminators");
  132. Matcher m = p.matcher(execOut);
  133. if(m.matches()) {
  134. String endings = m.group(2);
  135. String[] breaks = endings.split(",");
  136. for(String b : breaks) {
  137. if(b.equals("CRLF")) {
  138. b = "CR/LF";
  139. }
  140. linebreaks.add(b);
  141. }
  142. }
  143. }
  144. else if(execMimeOut.contains("charset=")) {
  145. format = execOut;
  146. mime = execMimeOut.substring(0,execMimeOut.indexOf("; charset="));
  147. }
  148. //else use output for format
  149. else {
  150. format = execOut;
  151. mime = execMimeOut;
  152. }
  153. Document rawOut = createXml(mime,format,charset,linebreaks,execOut+"\n"+execMimeOut);
  154. Document fitsXml = transform(xslt,rawOut);
  155. /*
  156. XMLOutputter outputter = new XMLOutputter(Format.getPrettyFormat());
  157. try {
  158. outputter.output(fitsXml, System.out);
  159. } catch (IOException e) {
  160. // TODO Auto-generated catch block
  161. e.printStackTrace();
  162. }
  163. */
  164. output = new ToolOutput(this,fitsXml,rawOut);
  165. duration = System.currentTimeMillis()-startTime;
  166. runStatus = RunStatus.SUCCESSFUL;
  167. logger.debug("FileUtility.extractInfo finished");
  168. return output;
  169. }
  170. public boolean testOSForCommand() throws FitsToolCLIException {
  171. String output = CommandLine.exec(FILE_TEST_COMMAND,null);
  172. if(output == null || output.length() == 0) {
  173. return false;
  174. }
  175. else {
  176. return true;
  177. }
  178. }
  179. private Document createXml(String mime_s, String format_s, String charset_s, List<String> linebreaks, String rawOutput_s) throws FitsToolException {
  180. //xml root
  181. Element root = new Element("fileUtilityOutput");
  182. //rawoutput
  183. Element rawOutput = new Element("rawOutput");
  184. rawOutput.setText(stripNonValidXMLCharacters(rawOutput_s));
  185. root.addContent(rawOutput);
  186. //mimetype
  187. Element mime = new Element("mimetype");
  188. mime.setText(mime_s);
  189. root.addContent(mime);
  190. //format
  191. Element format = new Element("format");
  192. format.setText(stripNonValidXMLCharacters(format_s));
  193. root.addContent(format);
  194. //charset
  195. if(charset_s != null) {
  196. Element charset = new Element("charset");
  197. charset.setText(charset_s);
  198. root.addContent(charset);
  199. }
  200. if(linebreaks.size() > 0) {
  201. for(String l : linebreaks) {
  202. Element linebreak = new Element("linebreak");
  203. linebreak.setText(l);
  204. root.addContent(linebreak);
  205. }
  206. }
  207. return new Document(root);
  208. }
  209. /*
  210. public boolean isIdentityKnown(FileIdentity identity) {
  211. //identity and mimetype must not be null or empty strings for an identity to be "known"
  212. if(identity == null
  213. || identity.getMime() == null
  214. || identity.getMime().length() == 0
  215. || identity.getFormat() == null
  216. || identity.getFormat().length() == 0) {
  217. return false;
  218. }
  219. String format = identity.getFormat();
  220. String mime = identity.getMime();
  221. if(format.equals("data") || format.equals("Unknown Binary") || mime.equals("application/octet-stream")) {
  222. return false;
  223. }
  224. else {
  225. return true;
  226. }
  227. }
  228. */
  229. public boolean isEnabled() {
  230. return enabled;
  231. }
  232. public void setEnabled(boolean value) {
  233. enabled = value;
  234. }
  235. public String stripNonValidXMLCharacters(String in) {
  236. StringBuffer out = new StringBuffer(); // Used to hold the output.
  237. char current; // Used to reference the current character.
  238. if (in == null || ("".equals(in))) return ""; // vacancy test.
  239. for (int i = 0; i < in.length(); i++) {
  240. current = in.charAt(i); // NOTE: No IndexOutOfBoundsException caught here; it should not happen.
  241. if ((current == 0x9) ||
  242. (current == 0xA) ||
  243. (current == 0xD) ||
  244. ((current >= 0x20) && (current <= 0xD7FF)) ||
  245. ((current >= 0xE000) && (current <= 0xFFFD)) ||
  246. ((current >= 0x10000) && (current <= 0x10FFFF)))
  247. out.append(current);
  248. }
  249. return out.toString();
  250. }
  251. }