PageRenderTime 45ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/ExtraTestInfo.cs

https://bitbucket.org/peterk87/mist
C# | 313 lines | 274 code | 28 blank | 11 comment | 36 complexity | 46deea95db0e160834600c09f9ec601e MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. namespace MIST
  5. {
  6. /// <summary></summary>
  7. [Serializable]
  8. public class ExtraTestInfo
  9. {
  10. private readonly Dictionary<string, int> _fieldIndexDict = new Dictionary<string, int>();
  11. private readonly FileInfo _fileInfo;
  12. private readonly Dictionary<int, Marker> _indexMarkerDict = new Dictionary<int, Marker>();
  13. private readonly List<string[]> _lines = new List<string[]>();
  14. private readonly Dictionary<Marker, int> _markerIndices = new Dictionary<Marker, int>();
  15. private readonly Dictionary<string, Dictionary<string, HashSet<int>>> _markerValueDict = new Dictionary<string, Dictionary<string, HashSet<int>>>();
  16. private readonly List<Marker> _markers;
  17. private string[] _fields;
  18. private string _testName;
  19. /// <summary>Provided a list of Marker, test name and file path to read the extra info from the extra information can be parsed into the output of the program</summary>
  20. /// <param name="markers">List of all Marker in the in silico typing test.</param>
  21. /// <param name="fileInfo">Extra information file information object.</param>
  22. /// <param name="testName">Test name corresponding to the extra information file and Marker in the Marker list.</param>
  23. public ExtraTestInfo(List<Marker> markers, FileInfo fileInfo, string testName)
  24. {
  25. _markers = markers;
  26. _fileInfo = fileInfo;
  27. _testName = testName;
  28. }
  29. public FileInfo FileInfo { get { return _fileInfo; } }
  30. public string FilePath { get { return _fileInfo.FullName; } set { } }
  31. public string TestName { get { return _testName; } set { _testName = value; } }
  32. public string[] Fields { get { return _fields; } set { _fields = value; } }
  33. public Dictionary<string, int> FieldIndexDict { get { return _fieldIndexDict; } }
  34. public Dictionary<int, Marker> IndexMarkerDict { get { return _indexMarkerDict; } }
  35. public List<string[]> Lines { get { return _lines; } }
  36. public Dictionary<string, int> MarkerIndices
  37. {
  38. get
  39. {
  40. var rtn = new Dictionary<string, int>();
  41. foreach (var markerIndex in _markerIndices)
  42. {
  43. int i = markerIndex.Value;
  44. Marker marker = markerIndex.Key;
  45. rtn.Add(marker.Name, i);
  46. }
  47. return rtn;
  48. }
  49. set { }
  50. }
  51. public Dictionary<string, Dictionary<string, HashSet<int>>> MarkerValueDict { get { return _markerValueDict; } }
  52. public List<Marker> Markers { get { return _markers; } }
  53. public List<string> GetExtraInfoHeaders()
  54. {
  55. var tmp = new List<string>();
  56. for (int i = 0; i < _fields.Length; i++)
  57. {
  58. if (!_indexMarkerDict.ContainsKey(i))
  59. {
  60. tmp.Add(_fields[i]);
  61. }
  62. }
  63. return tmp;
  64. }
  65. public bool Read()
  66. {
  67. try
  68. {
  69. using (var sr = new StreamReader(FilePath))
  70. {
  71. string readLine = sr.ReadLine();
  72. if (readLine != null) _fields = readLine.Split('\t');
  73. int fieldCount = 0;
  74. foreach (string field in _fields)
  75. {
  76. if (!_fieldIndexDict.ContainsKey(field))
  77. {
  78. _fieldIndexDict.Add(field, fieldCount);
  79. fieldCount++;
  80. if (!_markerValueDict.ContainsKey(field))
  81. _markerValueDict.Add(field, new Dictionary<string, HashSet<int>>());
  82. }
  83. }
  84. foreach (Marker marker in _markers)
  85. {
  86. if (marker.TestName == _testName)
  87. {
  88. int markerIndex;
  89. if (_fieldIndexDict.TryGetValue(marker.Name, out markerIndex))
  90. {
  91. _markerIndices.Add(marker, markerIndex);
  92. _indexMarkerDict.Add(markerIndex, marker);
  93. }
  94. }
  95. }
  96. int lineCount = 0;
  97. while (!sr.EndOfStream)
  98. {
  99. string line = sr.ReadLine();
  100. if (line == null)
  101. continue;
  102. string[] tmp = line.Split('\t');
  103. if (tmp.Length != _fields.Length)
  104. throw new Exception();
  105. _lines.Add(tmp);
  106. for (int i = 0; i < tmp.Length; i++)
  107. {
  108. string field = _fields[i];
  109. Dictionary<string, HashSet<int>> fieldValueDict;
  110. if (!_markerValueDict.TryGetValue(field, out fieldValueDict)) continue;
  111. string fieldValue = tmp[i];
  112. if (fieldValueDict.ContainsKey(fieldValue))
  113. {
  114. fieldValueDict[fieldValue].Add(lineCount);
  115. }
  116. else
  117. {
  118. fieldValueDict.Add(fieldValue, new HashSet<int> {lineCount});
  119. }
  120. }
  121. lineCount++;
  122. }
  123. }
  124. }
  125. catch (Exception)
  126. {
  127. return false;
  128. }
  129. return true;
  130. }
  131. /// <summary>Get only the matching extra information; none of the marker information.</summary>
  132. /// <param name="markerMatches">List of matches to look for matching extra information with.</param>
  133. /// <param name="fuzzyMatching"> </param>
  134. public List<string> GetExtraInfo(List<MarkerMatch> markerMatches, bool fuzzyMatching)
  135. {
  136. var list = new List<string>();
  137. var extraInfoIndices = new HashSet<int>();
  138. for (int i = 0; i < _fields.Length; i++)
  139. {
  140. if (!_indexMarkerDict.ContainsKey(i))
  141. {
  142. extraInfoIndices.Add(i);
  143. }
  144. }
  145. var hash = new HashSet<int>();
  146. for (int i = 0; i < _lines.Count; i++)
  147. {
  148. hash.Add(i);
  149. }
  150. foreach (MarkerMatch match in markerMatches)
  151. {
  152. Marker marker = match.Marker;
  153. Dictionary<string, HashSet<int>> markerValues;
  154. if (_markerValueDict.TryGetValue(marker.Name, out markerValues))
  155. {
  156. string result;
  157. result = fuzzyMatching
  158. ? Misc.NumberRegex.Match(match.AlleleMatch).Value
  159. : match.MarkerCall;
  160. HashSet<int> resultIndices;
  161. HashSet<int> nullResultIndices;
  162. if (markerValues.TryGetValue(result, out resultIndices))
  163. {
  164. if (markerValues.TryGetValue("", out nullResultIndices))
  165. {
  166. resultIndices.UnionWith(nullResultIndices);
  167. }
  168. hash.IntersectWith(resultIndices);
  169. }
  170. else
  171. {
  172. if (markerValues.TryGetValue("", out nullResultIndices))
  173. {
  174. hash.IntersectWith(nullResultIndices);
  175. }
  176. else
  177. {
  178. hash.Clear();
  179. return list;
  180. }
  181. }
  182. }
  183. }
  184. foreach (int extraInfoIndex in extraInfoIndices)
  185. {
  186. var extraInfo = new HashSet<string> {""};
  187. foreach (int i in hash)
  188. {
  189. string value = _lines[i][extraInfoIndex];
  190. extraInfo.Add(value);
  191. }
  192. extraInfo.Remove("");
  193. var strings = new List<string>();
  194. foreach (string s in extraInfo)
  195. {
  196. strings.Add(s);
  197. }
  198. list.Add(string.Join(" ; ", strings));
  199. }
  200. return list;
  201. }
  202. /// <summary>Get only the matching extra information; none of the marker information.</summary>
  203. /// <param name="markerMatches">List of matches to look for matching extra information with.</param>
  204. /// <param name="fuzzyMatching"> </param>
  205. public List<Dictionary<string, string>> GetExtraInfoDict(List<MarkerMatch> markerMatches, bool fuzzyMatching)
  206. {
  207. var list = new List<Dictionary<string, string>>();
  208. var extraInfoIndices = new HashSet<int>();
  209. for (int i = 0; i < _fields.Length; i++)
  210. {
  211. if (!_indexMarkerDict.ContainsKey(i))
  212. {
  213. extraInfoIndices.Add(i);
  214. }
  215. }
  216. var lineIndexMatchHash = new HashSet<int>();
  217. for (int i = 0; i < _lines.Count; i++)
  218. {
  219. lineIndexMatchHash.Add(i);
  220. }
  221. foreach (MarkerMatch match in markerMatches)
  222. {
  223. Marker marker = match.Marker;
  224. if (!_markerIndices.ContainsKey(marker))
  225. continue;
  226. Dictionary<string, HashSet<int>> markerValues;
  227. if (_markerValueDict.TryGetValue(marker.Name, out markerValues))
  228. {
  229. string result;
  230. result = (fuzzyMatching && marker.TypingTest == TestType.Allelic)
  231. ? Misc.NumberRegex.Match(match.AlleleMatch).Value
  232. : match.MarkerCall;
  233. HashSet<int> resultIndices;
  234. HashSet<int> nullResultIndices;
  235. if (markerValues.TryGetValue(result, out resultIndices))
  236. {
  237. if (markerValues.TryGetValue("", out nullResultIndices))
  238. {
  239. resultIndices.UnionWith(nullResultIndices);
  240. }
  241. lineIndexMatchHash.IntersectWith(resultIndices);
  242. }
  243. else
  244. {
  245. if (markerValues.TryGetValue("", out nullResultIndices))
  246. {
  247. lineIndexMatchHash.IntersectWith(nullResultIndices);
  248. }
  249. else
  250. {
  251. lineIndexMatchHash.Clear();
  252. var extraInfo = new Dictionary<string, string>();
  253. foreach (int extraInfoIndex in extraInfoIndices)
  254. {
  255. extraInfo.Add(_fields[extraInfoIndex], "");
  256. }
  257. list.Add(extraInfo);
  258. return list;
  259. }
  260. }
  261. }
  262. }
  263. foreach (int lineIndex in lineIndexMatchHash)
  264. {
  265. var extraInfo = new Dictionary<string, string>();
  266. foreach (int extraInfoIndex in extraInfoIndices)
  267. {
  268. string value = _lines[lineIndex][extraInfoIndex];
  269. extraInfo.Add(_fields[extraInfoIndex], value);
  270. }
  271. list.Add(extraInfo);
  272. }
  273. if (list.Count == 0)
  274. {
  275. var extraInfo = new Dictionary<string, string>();
  276. foreach (int extraInfoIndex in extraInfoIndices)
  277. {
  278. extraInfo.Add(_fields[extraInfoIndex], "");
  279. }
  280. list.Add(extraInfo);
  281. }
  282. return list;
  283. }
  284. }
  285. }