/Program.cs
C# | 307 lines | 196 code | 39 blank | 72 comment | 26 complexity | 0ed99eb82d1fddbe774fad2e27f33afd MD5 | raw file
- using System;
- using System.Collections.Generic;
- using System.Diagnostics;
- using System.IO;
- using System.Linq;
- using System.Threading;
- using NDesk.Options;
- using fastJSON;
- namespace MIST
- {
- internal class Program
- {
- private static bool _beautify;
- public static bool OutputAlleleSequences;
- private static void Main(string[] args)
- {
- bool showHelp = false;
- var testFiles = new List<string>();
- // string output = "-";
- var inputGenomes = new List<string>();
-
- Misc.BlastWordSize = 7;
- Misc.BlastErrorRetryLimit = 1;
- Misc.AlignmentLengthCoverage = 0.8;
- int cores = 1;
- var allelesFolders = new List<string>();
- Misc.TempDir = Path.GetTempPath();
- string jsonOutput = "-";
- var p = new OptionSet
- {
- {"t|test=", "Test info file. Typing test to perform.", s => testFiles.Add(s)},
- {"a|allele-dir=", "Alleles directory", s => allelesFolders.Add(s)},
- {"b|beautify", "Make your JSON beautiful!", s => _beautify = s != null},
- // {"o|results-out=", "Output filename for results. Default (-)/stdout.", s => output = s},
- {"l|alignment-length-coverage=", "Alignment length coverage threshold for amplicon probes (Default = 0.8)", (double s) => Misc.AlignmentLengthCoverage = s},
- {"w|wordsize=", "BLAST word size for PCR based tests. Default=7", (int s) => Misc.BlastWordSize = s},
- {"T|temp-dir=", "Directory to save temporary data (e.g. BLAST output and BLAST databases)", s => Misc.TempDir = s},
- {"c|cores=", "Number of cores to allow MIST to utilize (default=1)", (int s) => cores = s},
- {"e|blast-error-retries=", "Number of times MIST should retry running BLAST in case of output error.", (int s) => Misc.BlastErrorRetryLimit = s},
- {"j|json-out=", "json output filename. Default (-)/stdout.", s => jsonOutput = s},
- {"A|output-allele-sequences", "Output the allele sequences in the JSON output under TestMarkers", s => OutputAlleleSequences = s != null},
- {"h|help", "show this message and exit", s => showHelp = s != null},
- };
- // try to get the genome multifasta filenames and check that the files exist
- try
- {
- List<string> genomeFilenames = p.Parse(args);
- foreach (string genomeFilename in genomeFilenames)
- {
- if (File.Exists(genomeFilename))
- {
- inputGenomes.Add(genomeFilename);
- }
- else
- {
- var exGenomeFileNotFound = new FileNotFoundException(string.Format("Genome multifasta file '{0}' not found.", genomeFilename));
- Console.Error.WriteLine(exGenomeFileNotFound.Message);
- Console.Error.WriteLine(exGenomeFileNotFound.StackTrace);
- }
- }
- if (inputGenomes.Count == 0)
- {
- throw new ArgumentException("No multifasta input(s) specified. MIST could not run.");
- }
- }
- catch (Exception e)
- {
- Console.Error.Write("MIST argument exception: ");
- Console.Error.WriteLine(e.Message);
- Console.Error.WriteLine("Try `MIST --help' for more information.");
- ShowHelp(p);
- return;
- }
-
- if (showHelp || args.Length == 0)
- {
- ShowHelp(p);
- return;
- }
- if (cores > 1)
- {
- int origMaxCompletionThreads;
- int origMaxWorkerThreads;
- ThreadPool.GetMaxThreads(out origMaxWorkerThreads, out origMaxCompletionThreads);
- int origMinCompletionThreads;
- int origMinWorkerThreads;
- ThreadPool.GetMinThreads(out origMinWorkerThreads, out origMinCompletionThreads);
- ThreadPool.SetMinThreads(cores, origMinCompletionThreads);
- ThreadPool.SetMaxThreads(cores, origMaxCompletionThreads);
- Misc.Cores = cores;
- }
- else if (cores == 1)
- {
- Misc.Cores = 1;
- }
- var testFileInfo = new List<FileInfo>();
- foreach (string testFile in testFiles)
- {
- var fileInfo = new FileInfo(testFile);
- //only add existing test files
- if (!fileInfo.Exists)
- {
- var exTestFileNotFound = new FileNotFoundException(string.Format("Test info file '{0}' not found!", testFile));
- Console.Error.WriteLine(exTestFileNotFound.Message);
- Console.Error.WriteLine(exTestFileNotFound.StackTrace);
- continue;
- }
- testFileInfo.Add(fileInfo);
- }
- if (testFileInfo.Count == 0)
- {
- Console.WriteLine("No valid in silico test specified. MIST could not run.");
- return;
- }
- var allelesDirInfos = allelesFolders.Select(folder => new DirectoryInfo(folder)).ToList();
- var sw = new Stopwatch();
- sw.Start();
- //get the temporary data directory
- var tempDirInfo = new DirectoryInfo(Misc.TempDir);
- if (!tempDirInfo.Exists)
- tempDirInfo.Create();
- var ist = new InSilicoTyping(testFileInfo, allelesDirInfos, tempDirInfo);
- var extraTestInfos = (from pair in ist.TestNameTestInfoFile
- let testName = pair.Key
- let markersFileInfo = pair.Value
- let extraTestInfoFile = new FileInfo(markersFileInfo.FullName.Replace(markersFileInfo.Extension, ".txt"))
- where extraTestInfoFile.Exists
- select new ExtraTestInfo(ist.Markers, extraTestInfoFile, testName) into extraTestInfo
- where extraTestInfo.Read()
- select extraTestInfo).ToList();
- //add the each ExtraTestInfo object to the InSilicoTyping object
- foreach (ExtraTestInfo extraTestInfo in extraTestInfos)
- {
- ist.AddExtraInfo(extraTestInfo);
- }
-
- ist.AddGenomeFilesToAnalysis(inputGenomes);
- RunMIST(ist, inputGenomes, cores);
- // TODO: make JSON output optional
- WriteJSONOutput(ist, jsonOutput == "-" ? Console.OpenStandardOutput() : new FileStream(jsonOutput, FileMode.Create));
-
- // TODO: make simple table summary output optional
- //ist.WriteResults(output, true);
- sw.Stop();
- Console.Error.WriteLine(string.Format("MIST ran for {0} with {1} cores",
- sw.ElapsedMilliseconds,
- cores == -1 ? Environment.ProcessorCount : cores));
- }
- private static void WriteJSONOutput(InSilicoTyping ist, Stream jsonOutputStream)
- {
- using (var sw = new StreamWriter(jsonOutputStream))
- {
- var typingResults = new TypingResultsCollection(ist, true);
- var jsonText = JSON.Instance.ToJSON(typingResults);
- if (_beautify)
- jsonText = JSON.Instance.Beautify(jsonText);
- sw.WriteLine(jsonText);
- }
- }
- private static void RunMIST(InSilicoTyping ist, List<string> inputGenomes, int cores)
- {
- if (inputGenomes.Count == 1 || cores == 1)
- {
- foreach (string inputGenome in inputGenomes)
- {
- if (!File.Exists(inputGenome))
- {
- Console.Error.WriteLine(string.Format("Input genome {0} does not exist. MIST analysis not run on this genome", inputGenome));
- }
- else
- {
- ist.ReadAndGetMarkerMatchesForGenome(inputGenome);
- }
- }
- }
- else
- {
- var countdown = new CountdownEvent(1);
- foreach (string inputGenome in inputGenomes)
- {
- if (!File.Exists(inputGenome))
- {
- Console.Error.WriteLine(string.Format("Input genome {0} does not exist. MIST analysis not run on this genome", inputGenome));
- }
- else
- {
- countdown.AddCount();
- string genome = inputGenome;
- ThreadPool.QueueUserWorkItem(delegate
- {
- ist.ReadAndGetMarkerMatchesForGenome(genome);
- countdown.Signal();
- });
- }
- }
- countdown.Signal();
- countdown.Wait();
- }
- }
- //private static void WriteVerboseTableOutput(InSilicoTyping ist, string verboseOutput)
- //{
- // using (var swMatchInfo = new StreamWriter(verboseOutput))
- // {
- // var headerItems = new List<string>
- // {
- // "strain",
- // "marker",
- // "test",
- // "result",
- // "contig_match",
- // "match_found",
- // "metadata",
- // "mismatches",
- // "percent_identity",
- // "aln_len",
- // "gaps",
- // "amplicon_len",
- // "expected_amplicon_len",
- // "subject_start_index",
- // "subject_end_index",
- // "query_blast_aln_seq",
- // "subject_blast_aln_seq",
- // "amplicon_seq"
- // };
- // swMatchInfo.WriteLine(string.Join("\t", headerItems));
-
- // var typingResults = new TypingResultsCollection(ist, true);
- // foreach (ContigCollection fastaFile in ist.MultiFastaFiles)
- // {
- // foreach (var pair in fastaFile.MarkerMatchesDict)
- // {
- // MarkerMatch match = pair.Value;
-
- // var metadata = GetMarkerMatchMetadata(typingResults, match);
- // string queryBlastAlnSeq;
- // string subjBlastAlnSeq;
- // GetBlastAlnSequences(match, out subjBlastAlnSeq, out queryBlastAlnSeq);
-
- // var line = new List<string>
- // {
- // match.StrainName,
- // match.MarkerName,
- // match.TestName,
- // match.MarkerCall,
- // match.ContigMatchName,
- // match.CorrectMarkerMatch.ToString(),
- // metadata,
- // match.Mismatches.ToString(),
- // match.BlastPercentIdentity.ToString(),
- // match.BlastAlignmentLength.ToString(),
- // match.BlastGaps.ToString(),
- // match.AmpliconSize.ToString(),
- // match.ExpectedAmpliconSize.ToString(),
- // match.StartIndex.ToString(),
- // match.EndIndex.ToString(),
- // queryBlastAlnSeq,
- // subjBlastAlnSeq,
- // match.Amplicon,
- // };
- // swMatchInfo.WriteLine(string.Join("\t", line));
- // line.Clear();
- // }
- // }
- // }
- //}
- private static void ShowHelp(OptionSet p)
- {
- Console.WriteLine("Usage: MIST [OPTIONS]+ [Multifasta files]");
- Console.WriteLine("Generate in silico typing results for an organism using WGS data in multifasta format.");
- Console.WriteLine();
- Console.WriteLine("Options:");
- p.WriteOptionDescriptions(Console.Out);
- }
- }
- }