PageRenderTime 54ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/BlastProcess.cs

https://bitbucket.org/peterk87/mist
C# | 170 lines | 129 code | 14 blank | 27 comment | 12 complexity | c2809de4c2e9570055e058bc397c35c5 MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.IO;
  5. using System.Text.RegularExpressions;
  6. namespace MIST
  7. {
  8. internal class BlastProcess
  9. {
  10. /// <summary>Regex to find an error or exception in the process error output.</summary>
  11. private static readonly Regex ErrorRegex = new Regex(@"[Ee]rror|[Ee]xception", RegexOptions.Compiled);
  12. /// <summary>Process error ouput string.</summary>
  13. private string _error = "";
  14. /// <summary>Parsed BLAST output.</summary>
  15. private readonly List<BlastOutput> _blastOutputs = new List<BlastOutput>();
  16. /// <summary>Object to prevent cross thread interference.</summary>
  17. private readonly object _thisLock = new object();
  18. /// <summary>Parsed BLAST output.</summary>
  19. public List<BlastOutput> BlastOutputs
  20. {
  21. get { return _blastOutputs; }
  22. }
  23. /// <summary>Run nucleotide BLAST using a query string (standard input) and subject database in a specified working directory.</summary>
  24. /// <param name="workingDir">Working directory for running BLAST.</param>
  25. /// <param name="query">Query multifasta string (standard input).</param>
  26. /// <param name="subject">Subject filename.</param>
  27. /// <param name="testType">Test type used for determining how many results BLAST should return.</param>
  28. /// <returns></returns>
  29. public BlastProcess(DirectoryInfo workingDir, string query, string subject, TestType testType)
  30. {
  31. string args = "";
  32. switch (testType)
  33. {
  34. case TestType.OligoProbe:
  35. case TestType.SNP:
  36. args = "-num_alignments 1 -task blastn";
  37. break;
  38. case TestType.AmpliconProbe:
  39. case TestType.Allelic:
  40. args = "";
  41. break;
  42. case TestType.PCR:
  43. case TestType.Repeat:
  44. args = string.Format("-word_size {0}", Misc.BlastWordSize);
  45. break;
  46. }
  47. //name the output file something random determined by the OS filesystem
  48. //it's not like anyone really needs to check these files, they are temporary
  49. var outputFilename = string.Format("{0}", Path.GetRandomFileName());
  50. //run blast now to using multifasta file that has been formatted into a blast db as a subject and the allele file as the query
  51. var startInfo = new ProcessStartInfo(@"blastn",
  52. String.Format(
  53. @" -db ""{0}"" -outfmt ""6 qseqid sseqid pident length qstart qend sstart send qlen bitscore gaps sseq qseq mismatch"" {1} -out {2}",
  54. subject.Replace(' ', '_'),
  55. args,
  56. outputFilename
  57. ))
  58. {
  59. WorkingDirectory = workingDir.FullName,
  60. RedirectStandardError = true,
  61. RedirectStandardInput = true,
  62. UseShellExecute = false,
  63. CreateNoWindow = true
  64. };
  65. var outputFilePath = Path.Combine(workingDir.FullName, outputFilename);
  66. bool errorCaught;
  67. int errorCount = 0;
  68. do
  69. {
  70. _error = "";
  71. RunProcess(startInfo, query);
  72. //check if an error was produced while running BLAST
  73. if (ErrorRegex.IsMatch(_error))
  74. {
  75. Console.Error.WriteLine(string.Format("BLAST error:{0}", _error));
  76. errorCaught = true;
  77. }
  78. else
  79. {
  80. errorCaught = ParseBlastOutput(outputFilePath);
  81. if (errorCaught)
  82. {
  83. Console.Error.WriteLine(string.Format("Error parsing BLAST output:\n File: '{0}'\n Subject Genome: '{1}'\n Query: '{2}'\n TestType: '{3}'",
  84. outputFilePath,
  85. subject,
  86. query,
  87. testType));
  88. }
  89. }
  90. //if an error was produced by BLAST then rerun BLAST until the error goes away
  91. errorCount++;
  92. } while (errorCaught && errorCount < Misc.BlastErrorRetryLimit);
  93. }
  94. /// <summary>Run the process with the supplied process start info and standard input string. Receive both standard error and output data.</summary>
  95. /// <param name="startInfo"></param>
  96. /// <param name="input"></param>
  97. private void RunProcess(ProcessStartInfo startInfo, string input)
  98. {
  99. lock (_thisLock)
  100. {
  101. // Start the new process
  102. var p = new Process { StartInfo = startInfo };
  103. bool started = p.Start();
  104. while (!started)
  105. {
  106. p = new Process { StartInfo = startInfo };
  107. started = p.Start();
  108. }
  109. //provide query input
  110. p.StandardInput.Write(input);
  111. p.StandardInput.Close();
  112. //wait for the process to exit
  113. p.WaitForExit();
  114. _error = p.StandardError.ReadToEnd();
  115. p.Close();
  116. }
  117. }
  118. private bool ParseBlastOutput(string outputFilename)
  119. {
  120. _blastOutputs.Clear();
  121. bool blastOutputAdded = false;
  122. using (var sr = new StreamReader(outputFilename))
  123. {
  124. string line;
  125. //while there are lines that can be read
  126. while ((line = sr.ReadLine()) != null)
  127. {
  128. //check that the line is not null or empty
  129. if (string.IsNullOrEmpty(line))
  130. continue;
  131. //split the line based on tabs
  132. string[] split = line.Split('\t');
  133. //there should be 14 elements in the split
  134. if (split.Length < 14)
  135. {
  136. Console.Error.WriteLine(string.Format( "BLAST result incomplete. Only {0} of 11 expected columns of data.",
  137. split.Length));
  138. Console.Error.WriteLine(line);
  139. return true;
  140. }
  141. //try to parse the line split into a BlastOutput object
  142. var blastOutput = new BlastOutput();
  143. var ex = blastOutput.ParseBlastResult(split);
  144. if (ex != null)
  145. {
  146. Console.Error.WriteLine(string.Format("BLAST result parsing error:\n{0}", line));
  147. return true;
  148. }
  149. _blastOutputs.Add(blastOutput);
  150. blastOutputAdded = true;
  151. }
  152. }
  153. return !blastOutputAdded;
  154. }
  155. }
  156. }