PageRenderTime 56ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/src/main/java/es/uvigo/darwin/prottest/exe/PhyMLv3AminoAcidRunEstimator.java

https://bitbucket.org/diegodl/prottest3
Java | 435 lines | 292 code | 53 blank | 90 comment | 55 complexity | d93d620a0b3de455e45df2ba173f7c17 MD5 | raw file
Possible License(s): GPL-2.0
  1. /*
  2. Copyright (C) 2009 Diego Darriba
  3. This program is free software; you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation; either version 2 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program; if not, write to the Free Software
  13. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  14. */
  15. package es.uvigo.darwin.prottest.exe;
  16. import java.io.BufferedReader;
  17. import java.io.File;
  18. import java.io.FileReader;
  19. import java.io.IOException;
  20. import java.io.InputStreamReader;
  21. import pal.tree.ReadTree;
  22. import pal.tree.TreeParseException;
  23. import es.uvigo.darwin.prottest.exe.util.TemporaryFileManager;
  24. import static es.uvigo.darwin.prottest.global.AminoAcidApplicationGlobals.*;
  25. import es.uvigo.darwin.prottest.global.options.ApplicationOptions;
  26. import es.uvigo.darwin.prottest.model.AminoAcidModel;
  27. import es.uvigo.darwin.prottest.model.Model;
  28. import es.uvigo.darwin.prottest.util.Utilities;
  29. import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException;
  30. import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException.OSNotSupportedException;
  31. import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException.PhyMLExecutionException;
  32. import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException.StatsFileFormatException;
  33. import es.uvigo.darwin.prottest.util.exception.ModelOptimizationException.ModelNotFoundException;
  34. import es.uvigo.darwin.prottest.util.exception.ProtTestInternalException;
  35. import es.uvigo.darwin.prottest.util.exception.TreeFormatException;
  36. import java.util.Arrays;
  37. /**
  38. * The Class PhyMLAminoAcidRunEstimator. It optimizes Amino-Acid
  39. * model parameters using PhyML 3.0.
  40. *
  41. * @author Federico Abascal
  42. * @author Diego Darriba
  43. * @since 3.0
  44. */
  45. public class PhyMLv3AminoAcidRunEstimator extends AminoAcidRunEstimator {
  46. /** The PhyML implemented matrices. */
  47. public static String[] IMPLEMENTED_MATRICES = {
  48. "JTT",
  49. "LG",
  50. "DCMut",
  51. "MtREV",
  52. "MtMam",
  53. "MtArt",
  54. "Dayhoff",
  55. "WAG",
  56. "RtREV",
  57. "CpREV",
  58. "Blosum62",
  59. "VT",
  60. "HIVb",
  61. "HIVw"
  62. };
  63. /** Suffix for temporary statistic files. */
  64. private static final String STATS_FILE_SUFFIX = "_phyml_stats.txt";
  65. /** Suffix for temporary tree files. */
  66. private static final String TREE_FILE_SUFFIX = "_phyml_tree.txt";
  67. /** Alignment filename. */
  68. private String workAlignment;
  69. /** Custom model substitution matrix file**/
  70. private File modelFile;
  71. /**
  72. * Instantiates a new optimizer for amino-acid models
  73. * using PhyML.
  74. *
  75. * @param options the application options instance
  76. * @param model the amino-acid model to optimize
  77. */
  78. public PhyMLv3AminoAcidRunEstimator(ApplicationOptions options, Model model) {
  79. this(options, model, 1);
  80. }
  81. /**
  82. * Instantiates a new optimizer for amino-acid models
  83. * using PhyML.
  84. *
  85. * @param options the application options instance
  86. * @param model the amino-acid model to optimize
  87. * @param numberOfThreads the number of threads to use in the optimization
  88. */
  89. public PhyMLv3AminoAcidRunEstimator(ApplicationOptions options, Model model, int numberOfThreads) {
  90. super(options, model, numberOfThreads);
  91. this.numberOfCategories = options.ncat;
  92. try {
  93. this.model = (AminoAcidModel) model;
  94. // check if there is any matrix file
  95. modelFile = new File("models" + File.separator + model.getMatrix());
  96. } catch (ClassCastException cce) {
  97. throw new ProtTestInternalException("Wrong model type");
  98. }
  99. }
  100. /* (non-Javadoc)
  101. * @see es.uvigo.darwin.prottest.exe.RunEstimator#optimizeModel(es.uvigo.darwin.prottest.global.options.ApplicationOptions)
  102. */
  103. @Override
  104. public boolean runEstimator()
  105. throws ModelOptimizationException {
  106. //let's call Phyml with the proper command line
  107. this.workAlignment = TemporaryFileManager.getInstance().getAlignmentFilename(Thread.currentThread());
  108. String inv = "0.0";
  109. if (model.isInv()) {
  110. inv = "e";
  111. }
  112. String rateCathegories = "1";
  113. String alpha = "";
  114. if (model.isGamma()) {
  115. rateCathegories = "" + model.getNumberOfTransitionCategories();//options.ncat;
  116. alpha = "e";
  117. }
  118. String tr = "BIONJ";
  119. String F = "d";
  120. if (model.isPlusF()) {
  121. F = "e";
  122. }
  123. String topo = "lr";
  124. switch (options.strategyMode) {
  125. case OPTIMIZE_BIONJ:
  126. tr = "BIONJ";
  127. topo = "l";
  128. break;
  129. case OPTIMIZE_FIXED_BIONJ:
  130. if (TemporaryFileManager.getInstance().getTreeFilename(Thread.currentThread()) != null) {
  131. tr = TemporaryFileManager.getInstance().getTreeFilename(Thread.currentThread());
  132. topo = "l";
  133. } else {
  134. topo = "r";
  135. }
  136. break;
  137. case OPTIMIZE_ML:
  138. topo = "tl";
  139. break;
  140. case OPTIMIZE_USER:
  141. tr = TemporaryFileManager.getInstance().getTreeFilename(Thread.currentThread());
  142. topo = "l";
  143. }
  144. try {
  145. Runtime runtime = Runtime.getRuntime();
  146. String str[] = new String[29];
  147. for (int i = 0; i < str.length; i++) {
  148. str[i] = "";
  149. }
  150. java.io.File currentDir = new java.io.File("");
  151. File phymlBin = new File(currentDir.getAbsolutePath() + "/bin/phyml");
  152. String phymlBinName;
  153. if (phymlBin.exists() && phymlBin.canExecute()) {
  154. phymlBinName = phymlBin.getAbsolutePath();
  155. } else {
  156. phymlBinName = currentDir.getAbsolutePath() + "/bin/" + getPhymlVersion();
  157. }
  158. if (phymlBinName != null) {
  159. // phyml -i seq_file_name -d aa ¿-q? -f d/e (d para -F y e para +F) -v 0/e (para -I o +I) -a e (para estimar alpha)
  160. // -c 0/4/8 (num rate categories) -u user_tree_file (opcional)
  161. // -o tlr/lr (dependiendo de si optimizamos la topología o no)
  162. // -m WAG (default) | JTT | MtREV | Dayhoff | DCMut | RtREV | CpREV | VT | Blosum62 | MtMam | MtArt | HIVw | HIVb | custom
  163. str[0] = phymlBinName;
  164. // input alignment
  165. str[4] = "-i";
  166. str[5] = workAlignment;
  167. // number of rate categories
  168. str[6] = "-c";
  169. str[7] = rateCathegories;
  170. // the model
  171. str[8] = "-m";
  172. if (!Arrays.asList(IMPLEMENTED_MATRICES).contains(model.getMatrix())) {
  173. // check matrix file
  174. if (!modelFile.exists()) {
  175. throw new ModelNotFoundException(model.getMatrix());
  176. }
  177. str[9] = "custom";
  178. str[27] = "--aa_rate_file";
  179. str[28] = modelFile.getAbsolutePath();
  180. } else {
  181. str[9] = model.getMatrix();
  182. }
  183. // proportion of invariable sites
  184. str[10] = "-v";
  185. str[11] = inv;
  186. // value of the gamma shape parameter (if gamma distribution)
  187. if (!alpha.equals("")) {
  188. str[12] = "-a";
  189. str[13] = alpha;
  190. }
  191. // topology optimization
  192. str[14] = "-o";
  193. str[15] = topo;
  194. // amino-acid frequencies
  195. str[16] = "-f";
  196. str[17] = F;
  197. // starting tree file
  198. if (!tr.equals("BIONJ")) {
  199. str[18] = "-u";
  200. str[19] = tr;
  201. }
  202. // data type
  203. str[20] = "-d";
  204. str[21] = "aa";
  205. // bootstrapping
  206. str[22] = "-b";
  207. str[23] = "0";
  208. if (APPLICATION_PROPERTIES.getProperty("phyml_thread_scheduling", "false").equalsIgnoreCase("true")) {
  209. str[24] = "--num_threads";
  210. str[25] = String.valueOf(numberOfThreads);
  211. }
  212. if (APPLICATION_PROPERTIES.getProperty("no-memory-check", "no").equalsIgnoreCase("yes")) {
  213. str[26] = "--no_memory_check";
  214. }
  215. model.setCommandLine(str);
  216. proc = runtime.exec(str);
  217. proc.getOutputStream().write(modelFile.getPath().getBytes());
  218. ExternalExecutionManager.getInstance().addProcess(proc);
  219. } else {
  220. OSNotSupportedException e =
  221. new OSNotSupportedException("PhyML");
  222. throw e;
  223. }
  224. proc.getOutputStream().close();
  225. StreamGobbler errorGobbler = new PhymlStreamGobbler(new InputStreamReader(proc.getErrorStream()), "Phyml-Error", true, RunEstimator.class);
  226. StreamGobbler outputGobbler = new PhymlStreamGobbler(new InputStreamReader(proc.getInputStream()), "Phyml-Output", true, RunEstimator.class);
  227. errorGobbler.start();
  228. outputGobbler.start();
  229. int exitVal = proc.waitFor();
  230. ExternalExecutionManager.getInstance().removeProcess(proc);
  231. pfine("Phyml's command-line: ");
  232. for (int i = 0; i < str.length; i++) {
  233. pfine(str[i] + " ");
  234. }
  235. pfineln("");
  236. if (exitVal != 0) {
  237. errorln("Phyml's exit value: " + exitVal + " (there was probably some error)");
  238. error("Phyml's command-line: ");
  239. for (int i = 0; i < str.length; i++) {
  240. error(str[i] + " ");
  241. }
  242. errorln("");
  243. errorln("Please, take a look at the Phyml's log below:");
  244. String line;
  245. try {
  246. FileReader input = new FileReader(TemporaryFileManager.getInstance().getLogFilename(Thread.currentThread()));
  247. BufferedReader br = new BufferedReader(input);
  248. while ((line = br.readLine()) != null) {
  249. errorln(line);
  250. }
  251. } catch (IOException e) {
  252. errorln("Unable to read the log file: " + TemporaryFileManager.getInstance().getLogFilename(Thread.currentThread()));
  253. }
  254. }
  255. } catch (InterruptedException e) {
  256. throw new PhyMLExecutionException("Interrupted execution: " + e.getMessage());
  257. } catch (IOException e) {
  258. throw new PhyMLExecutionException("I/O error: " + e.getMessage());
  259. }
  260. if (!(readStatsFile() && readTreeFile())) {
  261. return false;
  262. }
  263. return true;
  264. }
  265. /**
  266. * Read the temporary statistics file.
  267. *
  268. * @return true, if successful
  269. */
  270. private boolean readStatsFile()
  271. throws ModelOptimizationException {
  272. String line;
  273. try {
  274. FileReader input = new FileReader(workAlignment + STATS_FILE_SUFFIX);
  275. BufferedReader br = new BufferedReader(input);
  276. while ((line = br.readLine()) != null) {
  277. pfinerln("[DEBUG] PHYML: " + line);
  278. if (line.length() > 0) {
  279. if (line.startsWith(". Model of amino acids substitution")) {
  280. String matrixName = Utilities.lastToken(line);
  281. //TODO: This line exists due to a bug in the phyml latest versions
  282. // where the HIVw matrix is shown as HIVb in the stats file
  283. if (!model.getMatrix().equals("HIVw"))
  284. if (!(model.getMatrix().equals(matrixName) || (modelFile.exists() &&
  285. !Arrays.asList(IMPLEMENTED_MATRICES).contains(model.getMatrix())))) {
  286. String errorMsg = "Matrix names doesn't match";
  287. errorln("PHYML: " + line);
  288. errorln("Last token: " + Utilities.lastToken(line));
  289. errorln("It should be: " + model.getMatrix());
  290. errorln(errorMsg);
  291. throw new ModelNotFoundException(model.getMatrix());
  292. }
  293. } else if (line.startsWith(". Log-likelihood")) {
  294. model.setLk(Double.parseDouble(Utilities.lastToken(line)));
  295. } else if (line.startsWith(". Discrete gamma model")) {
  296. if (Utilities.lastToken(line).equals("Yes")) {
  297. line = br.readLine();
  298. pfinerln("[DEBUG] PHYML: " + line);
  299. if (model.getNumberOfTransitionCategories() != Integer.parseInt(Utilities.lastToken(line))) {
  300. String errorMsg = "There were errors in the number of transition categories: " + model.getNumberOfTransitionCategories() + " vs " + Integer.parseInt(Utilities.lastToken(line));
  301. errorln(errorMsg);
  302. throw new StatsFileFormatException("PhyML", errorMsg);
  303. //prottest.setCurrentModel(-2);
  304. }
  305. line = br.readLine();
  306. pfinerln("[DEBUG] PHYML: " + line);
  307. model.setAlpha(Double.parseDouble(Utilities.lastToken(line)));
  308. }
  309. } else if (line.startsWith(". Proportion of invariant:")) {
  310. model.setInv(Double.parseDouble(Utilities.lastToken(line)));
  311. } else if (line.startsWith(". Time used")) {
  312. time = Utilities.lastToken(line);
  313. }
  314. }
  315. }
  316. } catch (IOException e) {
  317. throw new StatsFileFormatException("PhyML", e.getMessage());
  318. }
  319. return true;
  320. }
  321. /**
  322. * Read the temporary tree file.
  323. *
  324. * @return true, if successful
  325. *
  326. * @throws TreeFormatException the tree format exception
  327. */
  328. private boolean readTreeFile()
  329. throws TreeFormatException {
  330. try {
  331. model.setTree(new ReadTree(workAlignment + TREE_FILE_SUFFIX));
  332. } catch (TreeParseException e) {
  333. String errorMsg = "ProtTest: wrong tree format, exiting...";
  334. throw new TreeFormatException(errorMsg);
  335. } catch (IOException e) {
  336. String errorMsg = "Error: File not found (IO error), exiting...";
  337. throw new TreeFormatException(errorMsg);
  338. }
  339. return true;
  340. }
  341. /**
  342. * Delete temporary files.
  343. *
  344. * @return true, if successful
  345. */
  346. @Override
  347. protected boolean deleteTemporaryFiles() {
  348. File f;
  349. f = new File(workAlignment + STATS_FILE_SUFFIX);
  350. f.delete();
  351. f = new File(workAlignment + TREE_FILE_SUFFIX);
  352. f.delete();
  353. f = new File(TemporaryFileManager.getInstance().getLogFilename(Thread.currentThread()));
  354. f.delete();
  355. return true;
  356. }
  357. /**
  358. * Gets the PhyML executable name for the current Operating System.
  359. *
  360. * @return the PhyML executable name
  361. */
  362. private String getPhymlVersion() {
  363. String os = System.getProperty("os.name");
  364. if (os.startsWith("Mac")) {
  365. String oa = System.getProperty("os.arch");
  366. if (oa.startsWith("ppc")) {
  367. return "phyml-prottest-macppc";
  368. } else {
  369. return "phyml-prottest-macintel";
  370. }
  371. } else if (os.startsWith("Linux")) {
  372. return "phyml-prottest-linux";
  373. } else if (os.startsWith("Window")) {
  374. return "phyml-prottest-windows.exe";
  375. } else {
  376. return null;
  377. }
  378. }
  379. }