PageRenderTime 54ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/Program.cs

https://bitbucket.org/peterk87/mist
C# | 307 lines | 196 code | 39 blank | 72 comment | 26 complexity | 0ed99eb82d1fddbe774fad2e27f33afd MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.IO;
  5. using System.Linq;
  6. using System.Threading;
  7. using NDesk.Options;
  8. using fastJSON;
  9. namespace MIST
  10. {
  11. internal class Program
  12. {
  13. private static bool _beautify;
  14. public static bool OutputAlleleSequences;
  15. private static void Main(string[] args)
  16. {
  17. bool showHelp = false;
  18. var testFiles = new List<string>();
  19. // string output = "-";
  20. var inputGenomes = new List<string>();
  21. Misc.BlastWordSize = 7;
  22. Misc.BlastErrorRetryLimit = 1;
  23. Misc.AlignmentLengthCoverage = 0.8;
  24. int cores = 1;
  25. var allelesFolders = new List<string>();
  26. Misc.TempDir = Path.GetTempPath();
  27. string jsonOutput = "-";
  28. var p = new OptionSet
  29. {
  30. {"t|test=", "Test info file. Typing test to perform.", s => testFiles.Add(s)},
  31. {"a|allele-dir=", "Alleles directory", s => allelesFolders.Add(s)},
  32. {"b|beautify", "Make your JSON beautiful!", s => _beautify = s != null},
  33. // {"o|results-out=", "Output filename for results. Default (-)/stdout.", s => output = s},
  34. {"l|alignment-length-coverage=", "Alignment length coverage threshold for amplicon probes (Default = 0.8)", (double s) => Misc.AlignmentLengthCoverage = s},
  35. {"w|wordsize=", "BLAST word size for PCR based tests. Default=7", (int s) => Misc.BlastWordSize = s},
  36. {"T|temp-dir=", "Directory to save temporary data (e.g. BLAST output and BLAST databases)", s => Misc.TempDir = s},
  37. {"c|cores=", "Number of cores to allow MIST to utilize (default=1)", (int s) => cores = s},
  38. {"e|blast-error-retries=", "Number of times MIST should retry running BLAST in case of output error.", (int s) => Misc.BlastErrorRetryLimit = s},
  39. {"j|json-out=", "json output filename. Default (-)/stdout.", s => jsonOutput = s},
  40. {"A|output-allele-sequences", "Output the allele sequences in the JSON output under TestMarkers", s => OutputAlleleSequences = s != null},
  41. {"h|help", "show this message and exit", s => showHelp = s != null},
  42. };
  43. // try to get the genome multifasta filenames and check that the files exist
  44. try
  45. {
  46. List<string> genomeFilenames = p.Parse(args);
  47. foreach (string genomeFilename in genomeFilenames)
  48. {
  49. if (File.Exists(genomeFilename))
  50. {
  51. inputGenomes.Add(genomeFilename);
  52. }
  53. else
  54. {
  55. var exGenomeFileNotFound = new FileNotFoundException(string.Format("Genome multifasta file '{0}' not found.", genomeFilename));
  56. Console.Error.WriteLine(exGenomeFileNotFound.Message);
  57. Console.Error.WriteLine(exGenomeFileNotFound.StackTrace);
  58. }
  59. }
  60. if (inputGenomes.Count == 0)
  61. {
  62. throw new ArgumentException("No multifasta input(s) specified. MIST could not run.");
  63. }
  64. }
  65. catch (Exception e)
  66. {
  67. Console.Error.Write("MIST argument exception: ");
  68. Console.Error.WriteLine(e.Message);
  69. Console.Error.WriteLine("Try `MIST --help' for more information.");
  70. ShowHelp(p);
  71. return;
  72. }
  73. if (showHelp || args.Length == 0)
  74. {
  75. ShowHelp(p);
  76. return;
  77. }
  78. if (cores > 1)
  79. {
  80. int origMaxCompletionThreads;
  81. int origMaxWorkerThreads;
  82. ThreadPool.GetMaxThreads(out origMaxWorkerThreads, out origMaxCompletionThreads);
  83. int origMinCompletionThreads;
  84. int origMinWorkerThreads;
  85. ThreadPool.GetMinThreads(out origMinWorkerThreads, out origMinCompletionThreads);
  86. ThreadPool.SetMinThreads(cores, origMinCompletionThreads);
  87. ThreadPool.SetMaxThreads(cores, origMaxCompletionThreads);
  88. Misc.Cores = cores;
  89. }
  90. else if (cores == 1)
  91. {
  92. Misc.Cores = 1;
  93. }
  94. var testFileInfo = new List<FileInfo>();
  95. foreach (string testFile in testFiles)
  96. {
  97. var fileInfo = new FileInfo(testFile);
  98. //only add existing test files
  99. if (!fileInfo.Exists)
  100. {
  101. var exTestFileNotFound = new FileNotFoundException(string.Format("Test info file '{0}' not found!", testFile));
  102. Console.Error.WriteLine(exTestFileNotFound.Message);
  103. Console.Error.WriteLine(exTestFileNotFound.StackTrace);
  104. continue;
  105. }
  106. testFileInfo.Add(fileInfo);
  107. }
  108. if (testFileInfo.Count == 0)
  109. {
  110. Console.WriteLine("No valid in silico test specified. MIST could not run.");
  111. return;
  112. }
  113. var allelesDirInfos = allelesFolders.Select(folder => new DirectoryInfo(folder)).ToList();
  114. var sw = new Stopwatch();
  115. sw.Start();
  116. //get the temporary data directory
  117. var tempDirInfo = new DirectoryInfo(Misc.TempDir);
  118. if (!tempDirInfo.Exists)
  119. tempDirInfo.Create();
  120. var ist = new InSilicoTyping(testFileInfo, allelesDirInfos, tempDirInfo);
  121. var extraTestInfos = (from pair in ist.TestNameTestInfoFile
  122. let testName = pair.Key
  123. let markersFileInfo = pair.Value
  124. let extraTestInfoFile = new FileInfo(markersFileInfo.FullName.Replace(markersFileInfo.Extension, ".txt"))
  125. where extraTestInfoFile.Exists
  126. select new ExtraTestInfo(ist.Markers, extraTestInfoFile, testName) into extraTestInfo
  127. where extraTestInfo.Read()
  128. select extraTestInfo).ToList();
  129. //add the each ExtraTestInfo object to the InSilicoTyping object
  130. foreach (ExtraTestInfo extraTestInfo in extraTestInfos)
  131. {
  132. ist.AddExtraInfo(extraTestInfo);
  133. }
  134. ist.AddGenomeFilesToAnalysis(inputGenomes);
  135. RunMIST(ist, inputGenomes, cores);
  136. // TODO: make JSON output optional
  137. WriteJSONOutput(ist, jsonOutput == "-" ? Console.OpenStandardOutput() : new FileStream(jsonOutput, FileMode.Create));
  138. // TODO: make simple table summary output optional
  139. //ist.WriteResults(output, true);
  140. sw.Stop();
  141. Console.Error.WriteLine(string.Format("MIST ran for {0} with {1} cores",
  142. sw.ElapsedMilliseconds,
  143. cores == -1 ? Environment.ProcessorCount : cores));
  144. }
  145. private static void WriteJSONOutput(InSilicoTyping ist, Stream jsonOutputStream)
  146. {
  147. using (var sw = new StreamWriter(jsonOutputStream))
  148. {
  149. var typingResults = new TypingResultsCollection(ist, true);
  150. var jsonText = JSON.Instance.ToJSON(typingResults);
  151. if (_beautify)
  152. jsonText = JSON.Instance.Beautify(jsonText);
  153. sw.WriteLine(jsonText);
  154. }
  155. }
  156. private static void RunMIST(InSilicoTyping ist, List<string> inputGenomes, int cores)
  157. {
  158. if (inputGenomes.Count == 1 || cores == 1)
  159. {
  160. foreach (string inputGenome in inputGenomes)
  161. {
  162. if (!File.Exists(inputGenome))
  163. {
  164. Console.Error.WriteLine(string.Format("Input genome {0} does not exist. MIST analysis not run on this genome", inputGenome));
  165. }
  166. else
  167. {
  168. ist.ReadAndGetMarkerMatchesForGenome(inputGenome);
  169. }
  170. }
  171. }
  172. else
  173. {
  174. var countdown = new CountdownEvent(1);
  175. foreach (string inputGenome in inputGenomes)
  176. {
  177. if (!File.Exists(inputGenome))
  178. {
  179. Console.Error.WriteLine(string.Format("Input genome {0} does not exist. MIST analysis not run on this genome", inputGenome));
  180. }
  181. else
  182. {
  183. countdown.AddCount();
  184. string genome = inputGenome;
  185. ThreadPool.QueueUserWorkItem(delegate
  186. {
  187. ist.ReadAndGetMarkerMatchesForGenome(genome);
  188. countdown.Signal();
  189. });
  190. }
  191. }
  192. countdown.Signal();
  193. countdown.Wait();
  194. }
  195. }
  196. //private static void WriteVerboseTableOutput(InSilicoTyping ist, string verboseOutput)
  197. //{
  198. // using (var swMatchInfo = new StreamWriter(verboseOutput))
  199. // {
  200. // var headerItems = new List<string>
  201. // {
  202. // "strain",
  203. // "marker",
  204. // "test",
  205. // "result",
  206. // "contig_match",
  207. // "match_found",
  208. // "metadata",
  209. // "mismatches",
  210. // "percent_identity",
  211. // "aln_len",
  212. // "gaps",
  213. // "amplicon_len",
  214. // "expected_amplicon_len",
  215. // "subject_start_index",
  216. // "subject_end_index",
  217. // "query_blast_aln_seq",
  218. // "subject_blast_aln_seq",
  219. // "amplicon_seq"
  220. // };
  221. // swMatchInfo.WriteLine(string.Join("\t", headerItems));
  222. // var typingResults = new TypingResultsCollection(ist, true);
  223. // foreach (ContigCollection fastaFile in ist.MultiFastaFiles)
  224. // {
  225. // foreach (var pair in fastaFile.MarkerMatchesDict)
  226. // {
  227. // MarkerMatch match = pair.Value;
  228. // var metadata = GetMarkerMatchMetadata(typingResults, match);
  229. // string queryBlastAlnSeq;
  230. // string subjBlastAlnSeq;
  231. // GetBlastAlnSequences(match, out subjBlastAlnSeq, out queryBlastAlnSeq);
  232. // var line = new List<string>
  233. // {
  234. // match.StrainName,
  235. // match.MarkerName,
  236. // match.TestName,
  237. // match.MarkerCall,
  238. // match.ContigMatchName,
  239. // match.CorrectMarkerMatch.ToString(),
  240. // metadata,
  241. // match.Mismatches.ToString(),
  242. // match.BlastPercentIdentity.ToString(),
  243. // match.BlastAlignmentLength.ToString(),
  244. // match.BlastGaps.ToString(),
  245. // match.AmpliconSize.ToString(),
  246. // match.ExpectedAmpliconSize.ToString(),
  247. // match.StartIndex.ToString(),
  248. // match.EndIndex.ToString(),
  249. // queryBlastAlnSeq,
  250. // subjBlastAlnSeq,
  251. // match.Amplicon,
  252. // };
  253. // swMatchInfo.WriteLine(string.Join("\t", line));
  254. // line.Clear();
  255. // }
  256. // }
  257. // }
  258. //}
  259. private static void ShowHelp(OptionSet p)
  260. {
  261. Console.WriteLine("Usage: MIST [OPTIONS]+ [Multifasta files]");
  262. Console.WriteLine("Generate in silico typing results for an organism using WGS data in multifasta format.");
  263. Console.WriteLine();
  264. Console.WriteLine("Options:");
  265. p.WriteOptionDescriptions(Console.Out);
  266. }
  267. }
  268. }