PageRenderTime 44ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/Misc.cs

https://bitbucket.org/peterk87/mist
C# | 315 lines | 258 code | 26 blank | 31 comment | 15 complexity | 91c914f5ef717a806ceea7cfad3b76e9 MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.IO;
  5. using System.Text.RegularExpressions;
  6. namespace MIST
  7. {
  8. public static class Misc
  9. {
  10. /// <summary>BLAST word size (default: 11)</summary>
  11. public static int BlastWordSize { get; set; }
  12. public static string TempDir { get; set; }
  13. public static int Cores { get; set; }
  14. public static int BlastErrorRetryLimit { get; set; }
  15. public static double AlignmentLengthCoverage { get; set; }
  16. public static readonly Regex NumberRegex = new Regex(@"\d+", RegexOptions.Compiled);
  17. private static readonly object ThisLock = new object();
  18. public static bool IsDegenSequence(string sequence)
  19. {
  20. foreach (char c in sequence)
  21. {
  22. switch (c)
  23. {
  24. case 'A':
  25. case 'C':
  26. case 'G':
  27. case 'T':
  28. break;
  29. default:
  30. return true;
  31. }
  32. }
  33. return false;
  34. }
  35. /// <summary>Expand a degenerate nucleotide sequence into all of its possible non-degenerate sequences.
  36. /// Returns list of non-degenerate sequences.</summary>
  37. /// <param name="array">Mutable char array for holding nucleotide sequence for current non-degenerate sequence.</param>
  38. /// <param name="seq">Degenerate nucleotide sequence.</param>
  39. /// <param name="index">Current index in the degenerate nucleotide sequence.</param>
  40. /// <param name="list">List of non-degenerate sequences to be returned.</param>
  41. public static void ExpandDegenSequence(char[] array, string seq, int index, List<string> list)
  42. {
  43. if (index == seq.Length)
  44. {
  45. list.Add(new string(array));
  46. return;
  47. }
  48. char c = seq[index];
  49. var charList = new List<char>();
  50. switch (c)
  51. {
  52. case 'A':
  53. charList.Add('A');
  54. break;
  55. case 'T':
  56. charList.Add('T');
  57. break;
  58. case 'G':
  59. charList.Add('G');
  60. break;
  61. case 'C':
  62. charList.Add('C');
  63. break;
  64. case 'R':
  65. //A or G
  66. charList.Add('A');
  67. charList.Add('G');
  68. break;
  69. case 'Y':
  70. //C or T
  71. charList.Add('C');
  72. charList.Add('T');
  73. break;
  74. case 'M':
  75. //A or C
  76. charList.Add('A');
  77. charList.Add('C');
  78. break;
  79. case 'S':
  80. //G or C
  81. charList.Add('G');
  82. charList.Add('C');
  83. break;
  84. case 'W':
  85. //A or T
  86. charList.Add('A');
  87. charList.Add('T');
  88. break;
  89. case 'K':
  90. //G or T
  91. charList.Add('G');
  92. charList.Add('T');
  93. break;
  94. case 'V':
  95. //A, C or G; not T
  96. charList.Add('A');
  97. charList.Add('C');
  98. charList.Add('G');
  99. break;
  100. case 'D':
  101. //A, G or T; not C
  102. charList.Add('A');
  103. charList.Add('G');
  104. charList.Add('T');
  105. break;
  106. case 'H':
  107. //A, C or T; not G
  108. charList.Add('A');
  109. charList.Add('C');
  110. charList.Add('T');
  111. break;
  112. case 'B':
  113. //C, G or T; not A
  114. charList.Add('C');
  115. charList.Add('G');
  116. charList.Add('T');
  117. break;
  118. case 'N':
  119. //aNy
  120. charList.Add('A');
  121. charList.Add('C');
  122. charList.Add('G');
  123. charList.Add('T');
  124. break;
  125. }
  126. foreach (char c1 in charList)
  127. {
  128. array[index] = c1;
  129. ExpandDegenSequence(array, seq, index + 1, list);
  130. }
  131. }
  132. /// <summary>Get the number of different characters between two strings. Strings must be of the same length.</summary>
  133. /// <param name="s1">String</param>
  134. /// <param name="s2">String</param>
  135. /// <returns>Count of differences between two strings.</returns>
  136. public static int GetDifferences(string s1, string s2)
  137. {
  138. int count = 0;
  139. //only search the strings for the length of the shorter string
  140. int minLength = s1.Length > s2.Length ? s2.Length : s1.Length;
  141. for (int i = 0; i < minLength; i++)
  142. {
  143. if (s1[i] != s2[i])
  144. count++;
  145. }
  146. return count;
  147. }
  148. public static FileInfo WriteTempMultifastaFile(FileInfo multifasta, DirectoryInfo tmpDir, out int fastaEntries)
  149. {
  150. var contigCollection = new ContigCollection(multifasta.FullName);
  151. contigCollection.Read();
  152. fastaEntries = contigCollection.Contigs.Count;
  153. string path = Path.Combine(tmpDir.FullName, multifasta.Name.Replace(' ', '_'));
  154. using (var sw = new StreamWriter(path))
  155. {
  156. foreach (Contig contig in contigCollection.Contigs)
  157. {
  158. if (contig.Sequence.Length > 0)
  159. {
  160. sw.Write(">");
  161. sw.WriteLine(contig.Index);
  162. sw.WriteLine(contig.Sequence);
  163. }
  164. }
  165. }
  166. return new FileInfo(path);
  167. }
  168. public static void AdjustSubjectIndices(int length, ref int queryEndIndex, ref int queryStartIndex, bool reverseComplement, ref int subjectEndIndex, ref int subjectStartIndex)
  169. {
  170. int addToEnd = length - queryEndIndex;
  171. int addToStart = queryStartIndex - 1;
  172. if (reverseComplement)
  173. {
  174. subjectEndIndex--; //-1 to match array indices which start at 0
  175. subjectStartIndex--; //-1 to match array indices which start at 0
  176. subjectEndIndex += addToStart;
  177. subjectStartIndex -= addToEnd;
  178. }
  179. else
  180. {
  181. subjectStartIndex -= addToStart;
  182. subjectStartIndex--; //-1 to match array indices which start at 0
  183. subjectEndIndex += addToEnd;
  184. subjectEndIndex--; //-1 to match array indices which start at 0
  185. }
  186. }
  187. public static bool GetAmplicon(int startIndex, int endIndex, bool reverseComplement, Contig c, ref string amplicon)
  188. {
  189. lock (ThisLock)
  190. {
  191. try
  192. {
  193. var isContigTruncation = false;
  194. //get the amplicon sequence with the adjusted start and end indices
  195. if (startIndex < 0)
  196. {
  197. //if the amplicon extends to an end of a contig
  198. isContigTruncation = true;
  199. amplicon = c.Sequence.Length < endIndex ?
  200. c.Sequence :
  201. c.Sequence.Substring(0, endIndex + 1);
  202. }
  203. else
  204. {
  205. //if the adjusted amplicon length is longer than the actual length of the sequence
  206. //then go for the longest possible bit of sequence
  207. if ((c.Sequence.Length - startIndex) < (endIndex - startIndex + 1))
  208. {
  209. isContigTruncation = true;
  210. //Console.Error.WriteLine(string.Format("Amplicon contig truncation detected with contig {0} of multifasta {1}", c.Header, c.MultifastaFile.Name))
  211. amplicon = c.Sequence.Substring(startIndex, (c.Sequence.Length - startIndex));
  212. }
  213. else
  214. {
  215. amplicon = c.Sequence.Substring(startIndex, endIndex - startIndex + 1);
  216. }
  217. }
  218. amplicon = reverseComplement ? ReverseComplement(amplicon) : amplicon.ToUpper();
  219. if (isContigTruncation)
  220. {
  221. Console.Error.WriteLine(string.Format("Amplicon contig truncation detected with contig {0} of multifasta {1}", c.Header, c.MultifastaFile.Name));
  222. }
  223. return isContigTruncation;
  224. }
  225. catch (Exception)
  226. {
  227. return true;
  228. }
  229. }
  230. }
  231. /// <summary>Run makeblastdb on multifasta file.</summary>
  232. /// <param name="workingDir">Working directory; "Temp" folder within folder where the genome multifasta files were selected.</param>
  233. /// <param name="fiMultifasta">Genome multifasta file.</param>
  234. public static void MakeBlastDB(DirectoryInfo workingDir, FileInfo fiMultifasta)
  235. {
  236. var startInfo = new ProcessStartInfo(@"makeblastdb",
  237. String.Format(@"-in ""{0}"" -dbtype nucl",
  238. fiMultifasta.Name.Replace(' ', '_')))
  239. {
  240. WorkingDirectory = workingDir.FullName,
  241. RedirectStandardError = true,
  242. RedirectStandardOutput = true,
  243. UseShellExecute = false,
  244. CreateNoWindow = true
  245. };
  246. var p = new Process { StartInfo = startInfo };
  247. while (!p.Start())
  248. {
  249. p = new Process { StartInfo = startInfo };
  250. }
  251. p.WaitForExit();
  252. p.Close();
  253. }
  254. public static string ReverseComplement(string str)
  255. {
  256. string tmp = Complement(Reverse(str));
  257. return tmp;
  258. }
  259. private static string Reverse(string str)
  260. {
  261. char[] chrArray = str.ToCharArray();
  262. Array.Reverse(chrArray);
  263. return new string(chrArray);
  264. }
  265. private static string Complement(string str)
  266. {
  267. char[] ch = str.ToUpper().ToCharArray();
  268. for (int i = 0; i < ch.Length; i++)
  269. {
  270. switch (ch[i])
  271. {
  272. case 'A':
  273. ch[i] = 'T';
  274. break;
  275. case 'G':
  276. ch[i] = 'C';
  277. break;
  278. case 'C':
  279. ch[i] = 'G';
  280. break;
  281. case 'T':
  282. ch[i] = 'A';
  283. break;
  284. }
  285. }
  286. return new string(ch);
  287. }
  288. }
  289. }