PageRenderTime 34ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/Source/Framework/Bio/IO/GenBank/LocationResolver.cs

#
C# | 608 lines | 421 code | 67 blank | 120 comment | 99 complexity | 262da47c16306507d6909faf2568d9c9 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, CPL-1.0
  1. // *********************************************************
  2. //
  3. // Copyright (c) Microsoft. All rights reserved.
  4. // This code is licensed under the Apache License, Version 2.0.
  5. // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
  6. // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
  7. // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
  8. // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
  9. //
  10. // *********************************************************
  11. using System;
  12. using System.Collections.Generic;
  13. using System.Globalization;
  14. using System.Linq;
  15. namespace Bio.IO.GenBank
  16. {
  17. /// <summary>
  18. /// This is the default implementation of ILocationResolver.
  19. /// This class resolves the start and end positions of a location.
  20. ///
  21. /// Please see the following table for how this class resolves the ambiguities in start and end data.
  22. ///
  23. /// Start/End Data Resolved Start Resolved End
  24. /// 12.30 12 30
  25. /// &gt;30 30 30
  26. /// &lt;30 30 30
  27. /// 23^24 23 24
  28. /// 100^1 1000 1
  29. /// </summary>
  30. public class LocationResolver : ILocationResolver
  31. {
  32. #region Constructors
  33. /// <summary>
  34. /// Default constructor.
  35. /// </summary>
  36. public LocationResolver()
  37. {
  38. // No implementation.
  39. }
  40. #endregion Constructors
  41. #region Public Methods
  42. /// <summary>
  43. /// Returns the new LocationResolver instance that is a copy of this instance.
  44. /// </summary>
  45. public LocationResolver Clone()
  46. {
  47. return new LocationResolver();
  48. }
  49. #endregion Public Methods
  50. #region ILocationResolver Members
  51. /// <summary>
  52. /// Returns the start position by resolving the start-data present in the specified location.
  53. /// If unable to resolve start-data then an exception will occur.
  54. /// </summary>
  55. /// <param name="location">Location instance.</param>
  56. public int GetStart(ILocation location)
  57. {
  58. if (location == null)
  59. {
  60. throw new ArgumentNullException(Properties.Resource.ParameterNameLocation);
  61. }
  62. // If sub-locations are there, then get the minimum start position from the sub-locations.
  63. if (location.SubLocations.Count > 0)
  64. {
  65. return location.SubLocations.OrderBy(L => L.LocationStart).ToList()[0].LocationStart;
  66. }
  67. if (string.IsNullOrEmpty(location.StartData))
  68. {
  69. throw new ArgumentException(Properties.Resource.StartDataCannotBeNull);
  70. }
  71. return ResolveStart(location.StartData);
  72. }
  73. /// <summary>
  74. /// Returns the end position by resolving the end-data present in the specified location.
  75. /// If unable to resolve end-data then an exception will occur.
  76. /// </summary>
  77. /// <param name="location">Location instance.</param>
  78. public int GetEnd(ILocation location)
  79. {
  80. if (location == null)
  81. {
  82. throw new ArgumentNullException(Properties.Resource.ParameterNameLocation);
  83. }
  84. // If sub-locations are there, then get the max end position from the sub-locations.
  85. if (location.SubLocations.Count > 0)
  86. {
  87. return location.SubLocations.OrderByDescending(L => L.LocationEnd).ToList()[0].LocationEnd;
  88. }
  89. if (string.IsNullOrEmpty(location.EndData))
  90. {
  91. throw new InvalidOperationException(Properties.Resource.EndDataCannotBeNull);
  92. }
  93. return ResolveEnd(location.EndData);
  94. }
  95. /// <summary>
  96. /// Returns a sequence which contains bases from the specified sequence as specified by the location.
  97. /// If the location of a feature and sequence in which the feature is present is
  98. /// specified then this method returns a sequence which contains the bases of the specified feature.
  99. ///
  100. /// Please note that,
  101. /// 1. If Accession of the location is not null or empty then an exception will occur.
  102. /// 2. If the location contains "order" operator then this method uses SegmentedSequence class to construct the sequence.
  103. /// For example, order(100..200,300..450) will result in a SegmentedSequence which internally contains two sequences,
  104. /// first one created from 100 to 200 bases, and second one created from 300 to 450 bases.
  105. /// </summary>
  106. /// <param name="location">Location instance.</param>
  107. /// <param name="sequence">Sequence from which the sub sequence has to be returned.</param>
  108. public ISequence GetSubSequence(ILocation location, ISequence sequence)
  109. {
  110. if (location == null)
  111. {
  112. throw new ArgumentNullException(Properties.Resource.ParameterNameLocation);
  113. }
  114. if (sequence == null)
  115. {
  116. throw new ArgumentNullException(Properties.Resource.ParameterNameSequence);
  117. }
  118. return GetSubSequence(location, sequence, null);
  119. }
  120. /// <summary>
  121. /// Returns a sequence which contains bases from the specified sequence as specified by the location.
  122. /// If the location contains accession then the sequence from the referredSequences which matches the
  123. /// accession of the location will be considered.
  124. ///
  125. /// For example,
  126. /// if location is "join(100..200, J00089.1:10..50, J00090.2:30..40)"
  127. /// then bases from 100 to 200 will be considered from the sequence parameter and referredSequences will
  128. /// be searched for the J00089.1 and J00090.2 accession if found then those sequences will be considered
  129. /// for constructing the output sequence.
  130. /// If the referred sequence is not found in the referredSequences then an exception will occur.
  131. /// </summary>
  132. /// <param name="location">Location instance.</param>
  133. /// <param name="sequence">Sequence instance from which the sub sequence has to be returned.</param>
  134. /// <param name="referredSequences">A dictionary containing Accession numbers as keys and Sequences as values, this will be used when
  135. /// the location or sub-locations contains accession.</param>
  136. public ISequence GetSubSequence(ILocation location, ISequence sequence, Dictionary<string, ISequence> referredSequences)
  137. {
  138. if (location == null)
  139. {
  140. throw new ArgumentNullException(Properties.Resource.ParameterNameLocation);
  141. }
  142. if (sequence == null)
  143. {
  144. throw new ArgumentNullException(Properties.Resource.ParameterNameSequence);
  145. }
  146. DerivedSequence basicDerSeq;
  147. if (location.Operator == LocationOperator.Complement)
  148. {
  149. if (location.SubLocations.Count > 1)
  150. {
  151. throw new ArgumentException(Properties.Resource.ComplementWithMorethanOneSubLocs);
  152. }
  153. if (location.SubLocations.Count > 0)
  154. {
  155. basicDerSeq = new DerivedSequence(location.SubLocations[0].GetSubSequence(sequence, referredSequences), false, true);
  156. }
  157. else
  158. {
  159. basicDerSeq = new DerivedSequence(GetSubSequence(location.LocationStart, location.LocationEnd, location.Accession, location.Separator, sequence, referredSequences), false, true);
  160. }
  161. byte[] tempSeqData = new byte[basicDerSeq.Count];
  162. for (int i = 0; i < basicDerSeq.Count; i++)
  163. {
  164. tempSeqData[i] = basicDerSeq[i];
  165. }
  166. return new Sequence(sequence.Alphabet, tempSeqData);
  167. }
  168. if (location.Operator == LocationOperator.Order)
  169. {
  170. List<ISequence> subSequences = new List<ISequence>();
  171. if (location.SubLocations.Count > 0)
  172. {
  173. foreach (ILocation loc in location.SubLocations)
  174. {
  175. subSequences.Add(loc.GetSubSequence(sequence, referredSequences));
  176. }
  177. }
  178. else
  179. {
  180. basicDerSeq = new DerivedSequence(GetSubSequence(location.LocationStart, location.LocationEnd, location.Accession, location.Separator, sequence, referredSequences), false, false);
  181. byte[] seqData = new byte[basicDerSeq.Count];
  182. for (long i = 0; i < basicDerSeq.Count; i++)
  183. {
  184. seqData[i] = basicDerSeq[i];
  185. }
  186. subSequences.Add(new Sequence(sequence.Alphabet, seqData));
  187. }
  188. long totalSubSequenceLength = 0;
  189. long j = 0;
  190. foreach (ISequence seq in subSequences)
  191. {
  192. totalSubSequenceLength += seq.Count;
  193. }
  194. byte[] tempSeqData = new byte[totalSubSequenceLength];
  195. totalSubSequenceLength = 0;
  196. IAlphabet alphabet = null;
  197. int m = 0;
  198. foreach (ISequence seq in subSequences)
  199. {
  200. totalSubSequenceLength += seq.Count;
  201. while (j < totalSubSequenceLength)
  202. {
  203. tempSeqData[j] = seq[m];
  204. j++;
  205. m++;
  206. }
  207. m = 0;
  208. alphabet = seq.Alphabet;
  209. }
  210. //return Segmented sequence.
  211. return new Sequence(alphabet, tempSeqData);
  212. }
  213. if (location.Operator == LocationOperator.Join || location.Operator == LocationOperator.Bond)
  214. {
  215. if (location.SubLocations.Count > 0)
  216. {
  217. List<ISequence> subSequences = new List<ISequence>();
  218. foreach (ILocation loc in location.SubLocations)
  219. {
  220. subSequences.Add(loc.GetSubSequence(sequence, referredSequences));
  221. }
  222. long i = 0;
  223. long subSeqLength = 0;
  224. foreach (ISequence subSeq in subSequences)
  225. {
  226. subSeqLength += subSeq.Count;
  227. }
  228. byte[] seqData = new byte[subSeqLength];
  229. subSeqLength = 0;
  230. int m = 0;
  231. foreach (ISequence subSeq in subSequences)
  232. {
  233. subSeqLength += subSeq.Count;
  234. while (i < subSeqLength)
  235. {
  236. seqData[i] = subSeq[m];
  237. i++;
  238. m++;
  239. }
  240. m = 0;
  241. }
  242. Sequence seq = new Sequence(sequence.Alphabet,seqData);
  243. return seq;
  244. }
  245. else
  246. {
  247. return GetSubSequence(location.LocationStart, location.LocationEnd, location.Accession, location.Separator, sequence, referredSequences);
  248. }
  249. }
  250. if (location.SubLocations.Count > 0)
  251. {
  252. throw new ArgumentException(Properties.Resource.NoneWithSubLocs);
  253. }
  254. return GetSubSequence(location.LocationStart, location.LocationEnd, location.Accession, location.Separator, sequence, referredSequences);
  255. }
  256. /// <summary>
  257. /// Return true if the specified position is within the start position.
  258. /// For example,
  259. /// if the start-data of a location is "23.40", this method will
  260. /// return true for the position values ranging from 23 to 40.
  261. /// </summary>
  262. /// <param name="location">Location instance.</param>
  263. /// <param name="position">Position to be verified.</param>
  264. /// <returns>Returns true if the specified position is with in the start position else returns false.</returns>
  265. public bool IsInStart(ILocation location, int position)
  266. {
  267. if (location == null)
  268. {
  269. throw new ArgumentNullException(Properties.Resource.ParameterNameLocation);
  270. }
  271. List<ILocation> leafLocations = location.GetLeafLocations();
  272. foreach (ILocation loc in leafLocations)
  273. {
  274. int minStart = ResolveStart(loc.StartData);
  275. int maxStart = ResolveEnd(loc.StartData);
  276. if (position >= minStart && position <= maxStart)
  277. {
  278. return true;
  279. }
  280. }
  281. return false;
  282. }
  283. /// <summary>
  284. /// Return true if the specified position is within the end position.
  285. /// For example,
  286. /// if the end-data of a location is "23.40", this method will
  287. /// return true for the position values ranging from 23 to 40.
  288. /// </summary>
  289. /// <param name="location">Location instance.</param>
  290. /// <param name="position">Position to be verified.</param>
  291. /// <returns>Returns true if the specified P\position is with in the end position else returns false.</returns>
  292. public bool IsInEnd(ILocation location, int position)
  293. {
  294. if (location == null)
  295. {
  296. throw new ArgumentNullException(Properties.Resource.ParameterNameLocation);
  297. }
  298. List<ILocation> leafLocations = location.GetLeafLocations();
  299. foreach (ILocation loc in leafLocations)
  300. {
  301. int maxStart = ResolveEnd(loc.EndData);
  302. int minStart = ResolveStart(loc.EndData);
  303. if (position >= minStart && position <= maxStart)
  304. {
  305. return true;
  306. }
  307. }
  308. return false;
  309. }
  310. /// <summary>
  311. /// Returns true if the specified position is with in the start and end positions.
  312. /// </summary>
  313. /// <param name="location">Location instance.</param>
  314. /// <param name="position">Position to be verified.</param>
  315. /// <returns>Returns true if the specified position is with in the start and end positions else returns false.</returns>
  316. public bool IsInRange(ILocation location, int position)
  317. {
  318. if (location == null)
  319. {
  320. throw new ArgumentNullException(Properties.Resource.ParameterNameLocation);
  321. }
  322. List<ILocation> leafLocations = location.GetLeafLocations();
  323. foreach (ILocation loc in leafLocations)
  324. {
  325. if (position >= loc.LocationStart && position <= loc.LocationEnd)
  326. {
  327. return true;
  328. }
  329. }
  330. return false;
  331. }
  332. /// <summary>
  333. /// Creates a new ILocationResolver that is a copy of the current ILocationResolver.
  334. /// </summary>
  335. /// <returns>A new ILocationResolver that is a copy of this ILocationResolver.</returns>
  336. ILocationResolver ILocationResolver.Clone()
  337. {
  338. return Clone();
  339. }
  340. #endregion
  341. #region Private Methods
  342. /// <summary>
  343. /// Resolves and returns the start position.
  344. /// </summary>
  345. /// <param name="str">Start data.</param>
  346. private static int ResolveStart(string str)
  347. {
  348. int value;
  349. if (int.TryParse(str, out value))
  350. {
  351. return value;
  352. }
  353. else
  354. {
  355. if (str.StartsWith(">", StringComparison.OrdinalIgnoreCase))
  356. {
  357. int firstIndex = str.IndexOf(">", StringComparison.OrdinalIgnoreCase);
  358. if (firstIndex != str.LastIndexOf(">", StringComparison.OrdinalIgnoreCase))
  359. {
  360. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidStartData, str);
  361. throw new FormatException(msgStr);
  362. }
  363. return ResolveStart(str.Substring(1));
  364. }
  365. else if (str.StartsWith("<", StringComparison.OrdinalIgnoreCase))
  366. {
  367. int firstIndex = str.IndexOf("<", StringComparison.OrdinalIgnoreCase);
  368. if (firstIndex != str.LastIndexOf("<", StringComparison.OrdinalIgnoreCase))
  369. {
  370. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidStartData, str);
  371. throw new FormatException(msgStr);
  372. }
  373. return ResolveStart(str.Substring(1));
  374. }
  375. else if (str.Contains("^"))
  376. {
  377. int firstIndex = str.IndexOf("^", StringComparison.OrdinalIgnoreCase);
  378. if (firstIndex != str.LastIndexOf("^", StringComparison.OrdinalIgnoreCase))
  379. {
  380. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidStartData, str);
  381. throw new FormatException(msgStr);
  382. }
  383. string[] values = str.Split("^".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
  384. if (values.Length != 2)
  385. {
  386. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidStartData, str);
  387. throw new FormatException(msgStr);
  388. }
  389. return ResolveStart(values[0]);
  390. }
  391. else if (str.Contains("."))
  392. {
  393. int firstIndex = str.IndexOf(".", StringComparison.OrdinalIgnoreCase);
  394. if (firstIndex != str.LastIndexOf(".", StringComparison.OrdinalIgnoreCase))
  395. {
  396. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidStartData, str);
  397. throw new FormatException(msgStr);
  398. }
  399. string[] values = str.Split(".".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
  400. if (values.Length != 2)
  401. {
  402. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidStartData, str);
  403. throw new FormatException(msgStr);
  404. }
  405. return ResolveStart(values[0]);
  406. }
  407. else
  408. {
  409. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidStartData, str);
  410. throw new FormatException(msgStr);
  411. }
  412. }
  413. }
  414. /// <summary>
  415. /// Resolves and returns the end position.
  416. /// </summary>
  417. /// <param name="str">End data.</param>
  418. private static int ResolveEnd(string str)
  419. {
  420. int value;
  421. if (int.TryParse(str, out value))
  422. {
  423. return value;
  424. }
  425. else
  426. {
  427. if (str.StartsWith(">", StringComparison.OrdinalIgnoreCase))
  428. {
  429. int firstIndex = str.IndexOf(">", StringComparison.OrdinalIgnoreCase);
  430. if (firstIndex != str.LastIndexOf(">", StringComparison.OrdinalIgnoreCase))
  431. {
  432. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidEndData, str);
  433. throw new FormatException(msgStr);
  434. }
  435. return ResolveStart(str.Substring(1));
  436. }
  437. else if (str.StartsWith("<", StringComparison.OrdinalIgnoreCase))
  438. {
  439. int firstIndex = str.IndexOf("<", StringComparison.OrdinalIgnoreCase);
  440. if (firstIndex != str.LastIndexOf("<", StringComparison.OrdinalIgnoreCase))
  441. {
  442. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidEndData, str);
  443. throw new FormatException(msgStr);
  444. }
  445. return ResolveStart(str.Substring(1));
  446. }
  447. else if (str.Contains("^"))
  448. {
  449. int firstIndex = str.IndexOf("^", StringComparison.OrdinalIgnoreCase);
  450. if (firstIndex != str.LastIndexOf("^", StringComparison.OrdinalIgnoreCase))
  451. {
  452. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidEndData, str);
  453. throw new FormatException(msgStr);
  454. }
  455. string[] values = str.Split("^".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
  456. if (values.Length > 2)
  457. {
  458. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidEndData, str);
  459. throw new FormatException(msgStr);
  460. }
  461. return ResolveStart(values[values.Length - 1]);
  462. }
  463. else if (str.Contains("."))
  464. {
  465. int firstIndex = str.IndexOf(".", StringComparison.OrdinalIgnoreCase);
  466. if (firstIndex != str.LastIndexOf(".", StringComparison.OrdinalIgnoreCase))
  467. {
  468. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidEndData, str);
  469. throw new FormatException(msgStr);
  470. }
  471. string[] values = str.Split(".".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
  472. if (values.Length > 2)
  473. {
  474. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidEndData, str);
  475. throw new FormatException(msgStr);
  476. }
  477. return ResolveStart(values[values.Length - 1]);
  478. }
  479. else
  480. {
  481. string msgStr = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidEndData, str);
  482. throw new FormatException(msgStr);
  483. }
  484. }
  485. }
  486. /// <summary>
  487. /// Returns the sequence for the specified start and end positions.
  488. /// If the accession is null or empty then the source sequence is used to construct the output sequence,
  489. /// otherwise appropriate sequence from the referred sequence is used to construct output sequence.
  490. /// </summary>
  491. /// <param name="start">Start position.</param>
  492. /// <param name="end">End position.</param>
  493. /// <param name="accession">Accession number.</param>
  494. /// <param name="sepataror">Start and End separator.</param>
  495. /// <param name="source">Source sequence.</param>
  496. /// <param name="referredSequences">Referred Sequences.</param>
  497. private static ISequence GetSubSequence(int start, int end, string accession, string sepataror, ISequence source, Dictionary<string, ISequence> referredSequences)
  498. {
  499. if (string.Compare(sepataror, "^", StringComparison.OrdinalIgnoreCase) == 0)
  500. {
  501. return new Sequence(source.Alphabet, string.Empty);
  502. }
  503. if (string.Compare(sepataror, "..", StringComparison.OrdinalIgnoreCase) != 0 &&
  504. string.Compare(sepataror, ".", StringComparison.OrdinalIgnoreCase) != 0 &&
  505. !string.IsNullOrEmpty(sepataror))
  506. {
  507. string str = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidSeparator, sepataror);
  508. throw new ArgumentException(str);
  509. }
  510. if (!string.IsNullOrEmpty(accession) && (referredSequences == null || !referredSequences.ContainsKey(accession)))
  511. {
  512. string str = string.Format(CultureInfo.CurrentCulture, Properties.Resource.AccessionSequenceNotFound, accession);
  513. throw new ArgumentException(str);
  514. }
  515. if (!string.IsNullOrEmpty(accession))
  516. {
  517. if (source.Alphabet != referredSequences[accession].Alphabet)
  518. {
  519. string str = string.Format(CultureInfo.CurrentCulture, Properties.Resource.InvalidReferredAlphabet, accession);
  520. throw new ArgumentException(str);
  521. }
  522. source = referredSequences[accession];
  523. }
  524. // as location.start is one based where as Range accepts zero based index.
  525. start = start - 1;
  526. int length = end - start;
  527. if (string.IsNullOrEmpty(sepataror) || string.Compare(sepataror, ".", StringComparison.OrdinalIgnoreCase) == 0)
  528. {
  529. length = 1;
  530. }
  531. ISequence newSequence = source.GetSubSequence(start, length);
  532. byte[] seqData = new byte[newSequence.Count];
  533. for (long i = 0; i < newSequence.Count; i++)
  534. {
  535. seqData[i] = newSequence[i];
  536. }
  537. return new Sequence(source.Alphabet, seqData);
  538. }
  539. #endregion Private Methods
  540. }
  541. }