/ExtraTestInfo.cs
https://bitbucket.org/peterk87/mist · C# · 313 lines · 274 code · 28 blank · 11 comment · 36 complexity · 46deea95db0e160834600c09f9ec601e MD5 · raw file
- using System;
- using System.Collections.Generic;
- using System.IO;
- namespace MIST
- {
- /// <summary></summary>
- [Serializable]
- public class ExtraTestInfo
- {
- private readonly Dictionary<string, int> _fieldIndexDict = new Dictionary<string, int>();
- private readonly FileInfo _fileInfo;
- private readonly Dictionary<int, Marker> _indexMarkerDict = new Dictionary<int, Marker>();
- private readonly List<string[]> _lines = new List<string[]>();
- private readonly Dictionary<Marker, int> _markerIndices = new Dictionary<Marker, int>();
- private readonly Dictionary<string, Dictionary<string, HashSet<int>>> _markerValueDict = new Dictionary<string, Dictionary<string, HashSet<int>>>();
- private readonly List<Marker> _markers;
- private string[] _fields;
- private string _testName;
- /// <summary>Provided a list of Marker, test name and file path to read the extra info from the extra information can be parsed into the output of the program</summary>
- /// <param name="markers">List of all Marker in the in silico typing test.</param>
- /// <param name="fileInfo">Extra information file information object.</param>
- /// <param name="testName">Test name corresponding to the extra information file and Marker in the Marker list.</param>
- public ExtraTestInfo(List<Marker> markers, FileInfo fileInfo, string testName)
- {
- _markers = markers;
- _fileInfo = fileInfo;
- _testName = testName;
- }
- public FileInfo FileInfo { get { return _fileInfo; } }
- public string FilePath { get { return _fileInfo.FullName; } set { } }
- public string TestName { get { return _testName; } set { _testName = value; } }
- public string[] Fields { get { return _fields; } set { _fields = value; } }
- public Dictionary<string, int> FieldIndexDict { get { return _fieldIndexDict; } }
- public Dictionary<int, Marker> IndexMarkerDict { get { return _indexMarkerDict; } }
- public List<string[]> Lines { get { return _lines; } }
- public Dictionary<string, int> MarkerIndices
- {
- get
- {
- var rtn = new Dictionary<string, int>();
- foreach (var markerIndex in _markerIndices)
- {
- int i = markerIndex.Value;
- Marker marker = markerIndex.Key;
- rtn.Add(marker.Name, i);
- }
- return rtn;
- }
- set { }
- }
- public Dictionary<string, Dictionary<string, HashSet<int>>> MarkerValueDict { get { return _markerValueDict; } }
- public List<Marker> Markers { get { return _markers; } }
- public List<string> GetExtraInfoHeaders()
- {
- var tmp = new List<string>();
- for (int i = 0; i < _fields.Length; i++)
- {
- if (!_indexMarkerDict.ContainsKey(i))
- {
- tmp.Add(_fields[i]);
- }
- }
- return tmp;
- }
- public bool Read()
- {
- try
- {
- using (var sr = new StreamReader(FilePath))
- {
- string readLine = sr.ReadLine();
- if (readLine != null) _fields = readLine.Split('\t');
- int fieldCount = 0;
- foreach (string field in _fields)
- {
- if (!_fieldIndexDict.ContainsKey(field))
- {
- _fieldIndexDict.Add(field, fieldCount);
- fieldCount++;
- if (!_markerValueDict.ContainsKey(field))
- _markerValueDict.Add(field, new Dictionary<string, HashSet<int>>());
- }
- }
- foreach (Marker marker in _markers)
- {
- if (marker.TestName == _testName)
- {
- int markerIndex;
- if (_fieldIndexDict.TryGetValue(marker.Name, out markerIndex))
- {
- _markerIndices.Add(marker, markerIndex);
- _indexMarkerDict.Add(markerIndex, marker);
- }
- }
- }
- int lineCount = 0;
- while (!sr.EndOfStream)
- {
- string line = sr.ReadLine();
- if (line == null)
- continue;
- string[] tmp = line.Split('\t');
- if (tmp.Length != _fields.Length)
- throw new Exception();
- _lines.Add(tmp);
- for (int i = 0; i < tmp.Length; i++)
- {
- string field = _fields[i];
- Dictionary<string, HashSet<int>> fieldValueDict;
- if (!_markerValueDict.TryGetValue(field, out fieldValueDict)) continue;
- string fieldValue = tmp[i];
- if (fieldValueDict.ContainsKey(fieldValue))
- {
- fieldValueDict[fieldValue].Add(lineCount);
- }
- else
- {
- fieldValueDict.Add(fieldValue, new HashSet<int> {lineCount});
- }
- }
- lineCount++;
- }
- }
- }
- catch (Exception)
- {
- return false;
- }
- return true;
- }
- /// <summary>Get only the matching extra information; none of the marker information.</summary>
- /// <param name="markerMatches">List of matches to look for matching extra information with.</param>
- /// <param name="fuzzyMatching"> </param>
- public List<string> GetExtraInfo(List<MarkerMatch> markerMatches, bool fuzzyMatching)
- {
- var list = new List<string>();
- var extraInfoIndices = new HashSet<int>();
- for (int i = 0; i < _fields.Length; i++)
- {
- if (!_indexMarkerDict.ContainsKey(i))
- {
- extraInfoIndices.Add(i);
- }
- }
- var hash = new HashSet<int>();
- for (int i = 0; i < _lines.Count; i++)
- {
- hash.Add(i);
- }
- foreach (MarkerMatch match in markerMatches)
- {
- Marker marker = match.Marker;
- Dictionary<string, HashSet<int>> markerValues;
- if (_markerValueDict.TryGetValue(marker.Name, out markerValues))
- {
- string result;
- result = fuzzyMatching
- ? Misc.NumberRegex.Match(match.AlleleMatch).Value
- : match.MarkerCall;
- HashSet<int> resultIndices;
- HashSet<int> nullResultIndices;
- if (markerValues.TryGetValue(result, out resultIndices))
- {
- if (markerValues.TryGetValue("", out nullResultIndices))
- {
- resultIndices.UnionWith(nullResultIndices);
- }
- hash.IntersectWith(resultIndices);
- }
- else
- {
- if (markerValues.TryGetValue("", out nullResultIndices))
- {
- hash.IntersectWith(nullResultIndices);
- }
- else
- {
- hash.Clear();
- return list;
- }
- }
- }
- }
- foreach (int extraInfoIndex in extraInfoIndices)
- {
- var extraInfo = new HashSet<string> {""};
- foreach (int i in hash)
- {
- string value = _lines[i][extraInfoIndex];
- extraInfo.Add(value);
- }
- extraInfo.Remove("");
- var strings = new List<string>();
- foreach (string s in extraInfo)
- {
- strings.Add(s);
- }
- list.Add(string.Join(" ; ", strings));
- }
- return list;
- }
- /// <summary>Get only the matching extra information; none of the marker information.</summary>
- /// <param name="markerMatches">List of matches to look for matching extra information with.</param>
- /// <param name="fuzzyMatching"> </param>
- public List<Dictionary<string, string>> GetExtraInfoDict(List<MarkerMatch> markerMatches, bool fuzzyMatching)
- {
- var list = new List<Dictionary<string, string>>();
- var extraInfoIndices = new HashSet<int>();
- for (int i = 0; i < _fields.Length; i++)
- {
- if (!_indexMarkerDict.ContainsKey(i))
- {
- extraInfoIndices.Add(i);
- }
- }
- var lineIndexMatchHash = new HashSet<int>();
- for (int i = 0; i < _lines.Count; i++)
- {
- lineIndexMatchHash.Add(i);
- }
- foreach (MarkerMatch match in markerMatches)
- {
- Marker marker = match.Marker;
- if (!_markerIndices.ContainsKey(marker))
- continue;
- Dictionary<string, HashSet<int>> markerValues;
- if (_markerValueDict.TryGetValue(marker.Name, out markerValues))
- {
- string result;
- result = (fuzzyMatching && marker.TypingTest == TestType.Allelic)
- ? Misc.NumberRegex.Match(match.AlleleMatch).Value
- : match.MarkerCall;
- HashSet<int> resultIndices;
- HashSet<int> nullResultIndices;
- if (markerValues.TryGetValue(result, out resultIndices))
- {
- if (markerValues.TryGetValue("", out nullResultIndices))
- {
- resultIndices.UnionWith(nullResultIndices);
- }
- lineIndexMatchHash.IntersectWith(resultIndices);
- }
- else
- {
- if (markerValues.TryGetValue("", out nullResultIndices))
- {
- lineIndexMatchHash.IntersectWith(nullResultIndices);
- }
- else
- {
- lineIndexMatchHash.Clear();
- var extraInfo = new Dictionary<string, string>();
- foreach (int extraInfoIndex in extraInfoIndices)
- {
- extraInfo.Add(_fields[extraInfoIndex], "");
- }
- list.Add(extraInfo);
- return list;
- }
- }
- }
- }
- foreach (int lineIndex in lineIndexMatchHash)
- {
- var extraInfo = new Dictionary<string, string>();
- foreach (int extraInfoIndex in extraInfoIndices)
- {
- string value = _lines[lineIndex][extraInfoIndex];
- extraInfo.Add(_fields[extraInfoIndex], value);
- }
- list.Add(extraInfo);
- }
- if (list.Count == 0)
- {
- var extraInfo = new Dictionary<string, string>();
- foreach (int extraInfoIndex in extraInfoIndices)
- {
- extraInfo.Add(_fields[extraInfoIndex], "");
- }
- list.Add(extraInfo);
- }
- return list;
- }
- }
- }