/eLMM/CodePlex/MBF/IO/VirtualSequenceList.cs

# · C# · 397 lines · 201 code · 59 blank · 137 comment · 11 complexity · 69641547f19542bc3d597c348dc222a4 MD5 · raw file

  1. //*********************************************************
  2. //
  3. // Copyright (c) Microsoft Corporation. All rights reserved.
  4. //
  5. //
  6. //
  7. //
  8. //
  9. //
  10. //*********************************************************
  11. using System;
  12. using System.Collections;
  13. using System.Collections.Generic;
  14. using System.Linq;
  15. using System.IO;
  16. namespace Bio.IO
  17. {
  18. /// <summary>
  19. /// A VirtualSequenceList is used to store huge list of sequences where all items in the
  20. /// list might not be held in memory always. This list will get the item from cache or
  21. /// a virtual data provider when requested. Example, FastA file has more than one
  22. /// sequence and Data Virtualization returns this class and then, on demand
  23. /// each sequences are loaded from the FastA file using parser.
  24. /// </summary>
  25. public class VirtualSequenceList : IVirtualSequenceList
  26. {
  27. #region Fields
  28. /// <summary>
  29. /// Number of items in the actual file
  30. /// </summary>
  31. private readonly int _count;
  32. /// <summary>
  33. /// SequencePointer provider from sidecar file
  34. /// </summary>
  35. private readonly SidecarFileProvider _sidecarProvider;
  36. /// <summary>
  37. /// Parser used to parse sequence data on request
  38. /// </summary>
  39. private readonly IVirtualSequenceParser _sequenceParser;
  40. /// <summary>
  41. /// Contains the index of the sequence in the actual file, and a weak reference to that sequence.
  42. /// </summary>
  43. private readonly Dictionary<int, WeakReference> _sequenceDictionary;
  44. #endregion
  45. #region Constructors
  46. /// <summary>
  47. /// Initializes a new instance of the VirtualSequenceList class with a specified provider,
  48. /// a specified parser, and a specifed sequence count.
  49. /// </summary>
  50. /// <param name="provider">SequencePointer provider from sidecar file.</param>
  51. /// <param name="parser">Parser used to parse sequence data on request.</param>
  52. /// <param name="count">Number of items in the actual file.</param>
  53. public VirtualSequenceList(SidecarFileProvider provider, IVirtualSequenceParser parser, int count)
  54. {
  55. _sequenceParser = parser;
  56. _sidecarProvider = provider;
  57. _count = count;
  58. _sequenceDictionary = new Dictionary<int, WeakReference>(_count);
  59. }
  60. #endregion
  61. #region Properties
  62. /// <summary>
  63. /// Gets or sets a value indicating whether to set the sequence to read-only
  64. /// when reading from the sidecar file.
  65. /// </summary>
  66. public bool CreateSequenceAsReadOnly { get; set; }
  67. #endregion
  68. #region IList<ISequence> Members
  69. /// <summary>
  70. /// Returns the index of the first sequence matching the sequence
  71. /// passed in to the parameter. This does not do a value-based
  72. /// comparison. The match must be the exact same ISequence.
  73. /// </summary>
  74. /// <returns>the zero-based index of the sequence if found; otherwise, -1</returns>
  75. public int IndexOf(ISequence item)
  76. {
  77. foreach (var entry in _sequenceDictionary)
  78. {
  79. if (ReferenceEquals(item, entry.Value.Target))
  80. {
  81. return entry.Key;
  82. }
  83. }
  84. return -1;
  85. }
  86. /// <summary>
  87. /// This method is not supported since VirtualSequenceList is read-only.
  88. /// </summary>
  89. public void Insert(int index, ISequence item)
  90. {
  91. throw new NotSupportedException(Properties.Resource.NotSupportedInVirtualSequence);
  92. }
  93. /// <summary>
  94. /// This method is not supported since VirtualSequenceList is read-only.
  95. /// </summary>
  96. public void RemoveAt(int index)
  97. {
  98. throw new NotSupportedException(Properties.Resource.NotSupportedInVirtualSequence);
  99. }
  100. /// <summary>
  101. /// Gets the index of a sequence within the list.
  102. /// Throws a NotSupportedException when attempting to set the position
  103. /// since VirtualSequenceList is read-only.
  104. /// </summary>
  105. /// <param name="index">The zero-based index of the sequence in the list.</param>
  106. /// <returns>The sequence found at the specified index.</returns>
  107. public ISequence this[int index]
  108. {
  109. get
  110. {
  111. Sequence virtualSequence;
  112. if (_sequenceDictionary.ContainsKey(index))
  113. {
  114. virtualSequence = _sequenceDictionary[index].Target as Sequence;
  115. if (virtualSequence != null)
  116. {
  117. return virtualSequence;
  118. }
  119. _sequenceDictionary.Remove(index);
  120. }
  121. SequencePointer pointer = _sidecarProvider[index];
  122. // Get the alphabet from alphabet name.
  123. IAlphabet alphabet = Alphabets.All.Single(A => A.Name.Equals(pointer.AlphabetName));
  124. virtualSequence = new Sequence(alphabet)
  125. {
  126. ID = (_sequenceParser as Fasta.FastaParser).GetSequenceID(pointer),
  127. VirtualSequenceProvider =
  128. new FileVirtualSequenceProvider(_sequenceParser, pointer)
  129. };
  130. if (pointer.EndingIndex - pointer.StartingIndex < virtualSequence.VirtualSequenceProvider.BlockSize)
  131. {
  132. virtualSequence.VirtualSequenceProvider.BlockSize = (int)(pointer.EndingIndex - pointer.StartingIndex);
  133. }
  134. _sequenceDictionary.Add(index, new WeakReference(virtualSequence, false));
  135. return virtualSequence;
  136. }
  137. set
  138. {
  139. throw new NotSupportedException(Properties.Resource.NotSupportedInVirtualSequence);
  140. }
  141. }
  142. #endregion
  143. #region ICollection<ISequence> Members
  144. /// <summary>
  145. /// This method is not supported since VirtualSequenceList is read-only.
  146. /// </summary>
  147. public void Add(ISequence item)
  148. {
  149. throw new NotSupportedException(Properties.Resource.NotSupportedInVirtualSequence);
  150. }
  151. /// <summary>
  152. /// This method is not supported since VirtualSequenceList is read-only.
  153. /// </summary>
  154. public void Clear()
  155. {
  156. throw new NotSupportedException(Properties.Resource.NotSupportedInVirtualSequence);
  157. }
  158. /// <summary>
  159. /// Determines whether a specific sequence is in the virtual sequence list.
  160. /// </summary>
  161. /// <param name="item">The sequence to locate in the list.</param>
  162. /// <returns>true if the sequence is found in the list; otherwise, false</returns>
  163. public bool Contains(ISequence item)
  164. {
  165. return IndexOf(item) >= 0;
  166. }
  167. /// <summary>
  168. /// Copies the entire virtual sequence list to a compatible one-dimensional array,
  169. /// starting at the specified index of the target array.
  170. /// </summary>
  171. /// <param name="array">
  172. /// The one-dimensional array that is the destination of the elements
  173. /// copied from the current list. The array must have zero-based indexing.
  174. /// </param>
  175. /// <param name="arrayIndex">The zero-based index in the array at which copying begins.</param>
  176. public void CopyTo(ISequence[] array, int arrayIndex)
  177. {
  178. if (array == null)
  179. {
  180. throw new ArgumentNullException(Properties.Resource.ParameterNameArray);
  181. }
  182. int index = arrayIndex;
  183. foreach (ISequence seq in this)
  184. {
  185. array[index++] = seq;
  186. }
  187. }
  188. /// <summary>
  189. /// Gets the number of sequences in the list.
  190. /// </summary>
  191. public int Count
  192. {
  193. get { return _count; }
  194. }
  195. /// <summary>
  196. /// Gets the read-only status of the list.
  197. /// </summary>
  198. public bool IsReadOnly
  199. {
  200. get { return true; }
  201. }
  202. /// <summary>
  203. /// This method is not supported since VirtualSequenceList is read-only.
  204. /// </summary>
  205. public bool Remove(ISequence item)
  206. {
  207. throw new NotSupportedException(Properties.Resource.NotSupportedInVirtualSequence);
  208. }
  209. #endregion
  210. #region IEnumerable<ISequence> Members
  211. /// <summary>
  212. /// Get the enumerator to the sequences in the list.
  213. /// </summary>
  214. /// <returns>The enumerator to the sequences in the list.</returns>
  215. public IEnumerator<ISequence> GetEnumerator()
  216. {
  217. return new VirtualSequenceEnumerator(this);
  218. }
  219. #endregion
  220. #region IEnumerable Members
  221. /// <summary>
  222. /// Get the enumerator to the sequences in the list.
  223. /// </summary>
  224. /// <returns>The enumerator to the sequences in the list.</returns>
  225. IEnumerator IEnumerable.GetEnumerator()
  226. {
  227. return new VirtualSequenceEnumerator(this);
  228. }
  229. #endregion
  230. }
  231. /// <summary>
  232. /// Implementation of the enumerator for the VirtualSequenceList.
  233. /// </summary>
  234. internal class VirtualSequenceEnumerator : IEnumerator<ISequence>
  235. {
  236. #region Fields
  237. /// <summary>
  238. /// A list of sequences.
  239. /// </summary>
  240. private readonly IList<ISequence> _sequences;
  241. /// <summary>
  242. /// The zero-based index of the sequence in the list.
  243. /// </summary>
  244. private int _index;
  245. /// <summary>
  246. /// Track whether disposed has been called.
  247. /// </summary>
  248. private bool _disposed;
  249. #endregion
  250. #region Constructors
  251. /// <summary>
  252. /// Initializes an enumerator for the VirtualSequenceList.
  253. /// </summary>
  254. /// <param name="virtualSequenceList"></param>
  255. public VirtualSequenceEnumerator(IList<ISequence> virtualSequenceList)
  256. {
  257. _sequences = virtualSequenceList;
  258. Reset();
  259. }
  260. #endregion
  261. #region IEnumerator<ISequence> Members
  262. /// <summary>
  263. /// The current item reference for the enumerator.
  264. /// </summary>
  265. public ISequence Current
  266. {
  267. get
  268. {
  269. if (_index < 0)
  270. {
  271. return null;
  272. }
  273. return _sequences[_index];
  274. }
  275. }
  276. #endregion
  277. #region IDisposable Members
  278. /// <summary>
  279. /// Disposes of any allocated memory.
  280. /// </summary>
  281. public void Dispose()
  282. {
  283. Dispose(true);
  284. GC.SuppressFinalize(this);
  285. }
  286. /// <summary>
  287. /// Disposes of any allocated memory.
  288. /// </summary>
  289. /// <param name="disposing"></param>
  290. private void Dispose(bool disposing)
  291. {
  292. // Check to see if Dispose has already been called.
  293. if (!_disposed)
  294. {
  295. // If disposing equals true, dispose all managed
  296. // and unmanaged resources.
  297. if (disposing)
  298. {
  299. // No op
  300. }
  301. _disposed = true;
  302. }
  303. }
  304. #endregion
  305. #region IEnumerator Members
  306. /// <summary>
  307. /// The current item reference for the enumerator.
  308. /// </summary>
  309. object IEnumerator.Current
  310. {
  311. get
  312. {
  313. return _sequences[_index];
  314. }
  315. }
  316. /// <summary>
  317. /// Advances the enumerator to the next item.
  318. /// </summary>
  319. /// <returns></returns>
  320. public bool MoveNext()
  321. {
  322. if (_index < (_sequences.Count - 1))
  323. {
  324. _index++;
  325. return true;
  326. }
  327. return false;
  328. }
  329. /// <summary>
  330. /// Resets the enumerator to the start of the sequence.
  331. /// </summary>
  332. public void Reset()
  333. {
  334. _index = -1;
  335. }
  336. #endregion
  337. }
  338. }