/eLMM/CodePlex/MBF/WordMatch.cs

# · C# · 535 lines · 308 code · 67 blank · 160 comment · 64 complexity · d2d4c9e3637309dc669f63f5ba3c1a01 MD5 · raw file

  1. //*********************************************************
  2. //
  3. // Copyright (c) Microsoft Corporation. All rights reserved.
  4. //
  5. //
  6. //
  7. //
  8. //
  9. //
  10. //*********************************************************
  11. using System;
  12. using System.Collections.Generic;
  13. using Bio.Algorithms.Kmer;
  14. namespace Bio
  15. {
  16. /// <summary>
  17. /// WordMatch stores the region of similarity between two sequences.
  18. /// </summary>
  19. public class WordMatch : IComparable, IComparable<WordMatch>, IEquatable<WordMatch>
  20. {
  21. /// <summary>
  22. /// Length of the match.
  23. /// </summary>
  24. private int length;
  25. /// <summary>
  26. /// Start index of the first sequence.
  27. /// </summary>
  28. private int sequence1Start;
  29. /// <summary>
  30. /// Start index of the second sequence.
  31. /// </summary>
  32. private int sequence2Start;
  33. /// <summary>
  34. /// Initializes a new instance of the WordMatch class.
  35. /// </summary>
  36. /// <param name="length">Length of the match</param>
  37. /// <param name="sequence1Start">Start index of the first sequence.</param>
  38. /// <param name="sequence2Start"> Start index of the second sequence.</param>
  39. public WordMatch(int length, int sequence1Start, int sequence2Start)
  40. {
  41. this.length = length;
  42. this.sequence1Start = sequence1Start;
  43. this.sequence2Start = sequence2Start;
  44. }
  45. /// <summary>
  46. /// Gets or sets the length of the match
  47. /// </summary>
  48. public int Length
  49. {
  50. get { return length; }
  51. set { length = value; }
  52. }
  53. /// <summary>
  54. /// Gets or sets the start index of the first sequence.
  55. /// </summary>
  56. public int Sequence1Start
  57. {
  58. get { return sequence1Start; }
  59. set { sequence1Start = value; }
  60. }
  61. /// <summary>
  62. /// Gets or sets the start index of the second sequence.
  63. /// </summary>
  64. public int Sequence2Start
  65. {
  66. get { return sequence2Start; }
  67. set { sequence2Start = value; }
  68. }
  69. /// <summary>
  70. /// Gets or sets a value indicating whether this word match should be considered or not.
  71. /// </summary>
  72. public bool Deleted
  73. {
  74. get;
  75. set;
  76. }
  77. /// <summary>
  78. /// Given a list of matches, reduce it to the minimal set of best
  79. /// non-overlapping matches.
  80. /// </summary>
  81. /// <param name="completeList">List of matches to reduce to non-overlapping set.</param>
  82. /// <param name="wordLength">Wordlength entered by the user.</param>
  83. /// <returns>Minimal set of best non-overlapping matches.</returns>
  84. public static List<WordMatch> GetMinimalList(List<WordMatch> completeList, int wordLength)
  85. {
  86. List<WordMatch> minimalList = new List<WordMatch>();
  87. WordMatch node = new WordMatch(0, 0, 0);
  88. completeList.Sort();
  89. foreach (WordMatch wordMatch in completeList)
  90. {
  91. int deadx1 = 0;
  92. int deady1 = 0;
  93. int deadx2 = 0;
  94. int deady2 = 0;
  95. if (!wordMatch.Deleted)
  96. {
  97. // First pos of match
  98. deadx1 = wordMatch.sequence1Start;
  99. // First pos of match
  100. deady1 = wordMatch.sequence2Start;
  101. // Last pos of match
  102. deadx2 = wordMatch.sequence1Start + wordMatch.length - 1;
  103. // Last pos of match
  104. deady2 = wordMatch.sequence2Start + wordMatch.length - 1;
  105. foreach (WordMatch innerWordMatch in completeList)
  106. {
  107. if (wordMatch != innerWordMatch && !innerWordMatch.Deleted)
  108. {
  109. // Want to remove this match if it is in the dead zone
  110. bool result = WordDeadZone(innerWordMatch, deadx1, deady1, deadx2, deady2, wordLength);
  111. if (result)
  112. {
  113. // It is in the dead zone - remove it
  114. // Need to free up the match structure and remove the
  115. // current node of the list
  116. innerWordMatch.Deleted = true;
  117. }
  118. }
  119. }
  120. }
  121. }
  122. foreach (WordMatch wordMatch in completeList)
  123. {
  124. if (!wordMatch.Deleted)
  125. {
  126. minimalList.Add(wordMatch);
  127. }
  128. }
  129. return minimalList;
  130. }
  131. /// <summary>
  132. /// Create a list of all the matches and order them by the
  133. /// second sequence.
  134. /// </summary>
  135. /// <param name="kmerList">List of kmer's.</param>
  136. /// <param name="seq1">First sequence.</param>
  137. /// <param name="seq2">Second sequence.</param>
  138. /// <param name="wordLength">Wordlength entered by the user</param>
  139. /// <returns>List of all the matches.</returns>
  140. public static List<WordMatch> BuildMatchTable(KmersOfSequence kmerList, ISequence seq1, ISequence seq2, int wordLength)
  141. {
  142. int i = 0;
  143. int ilast = seq2.Count - wordLength;
  144. List<WordMatch> wordCurList = new List<WordMatch>();
  145. List<WordMatch> hitList = new List<WordMatch>();
  146. bool matched = false;
  147. while (i < (ilast + 1))
  148. {
  149. IList<int> positions = FindCorrespondingMatch(seq2.Range(i, wordLength), kmerList);
  150. if (positions != null)
  151. {
  152. int kcur = 0;
  153. int kcur2 = 0;
  154. int knew = 0;
  155. if (wordCurList.Count > 0)
  156. {
  157. WordMatch curmatch = wordCurList[0];
  158. kcur = curmatch.sequence1Start + curmatch.length - wordLength + 1;
  159. kcur2 = curmatch.sequence2Start + curmatch.length - wordLength + 1;
  160. }
  161. foreach (int position in positions)
  162. {
  163. knew = position;
  164. matched = false;
  165. foreach (WordMatch curmatch in wordCurList)
  166. {
  167. if (!curmatch.Deleted)
  168. {
  169. kcur = curmatch.sequence1Start + curmatch.length -
  170. wordLength + 1;
  171. kcur2 = curmatch.sequence2Start + curmatch.length -
  172. wordLength + 1;
  173. // When we test, we may have already incremented
  174. // one of the matches - so test old and new kcur2
  175. if (kcur2 != i && kcur2 != i + 1)
  176. {
  177. curmatch.Deleted = true;
  178. continue;
  179. }
  180. if (kcur == knew && kcur2 == i)
  181. {
  182. curmatch.length++;
  183. matched = true;
  184. continue;
  185. }
  186. }
  187. }
  188. if (!matched)
  189. {
  190. // New current match
  191. WordMatch match2 = new WordMatch(wordLength, knew, i);
  192. hitList.Add(match2);
  193. wordCurList.Add(match2);
  194. }
  195. }
  196. }
  197. i++;
  198. }
  199. wordCurList.Sort();
  200. foreach (WordMatch curmatch in wordCurList)
  201. {
  202. curmatch.Deleted = false;
  203. }
  204. return wordCurList;
  205. }
  206. #region IComparable Members
  207. /// <summary>
  208. /// CompareTo method is used while sorting WordMatch objects.
  209. /// </summary>
  210. /// <param name="obj">WordMatch object</param>
  211. /// <returns>
  212. /// Returns zero if the objects are equal,
  213. /// Else, returns zero if the objects have the same length, sequence1start and sequence2Start
  214. /// If lengths are equal, then the objects are ordered by sequence1start
  215. /// If lengths are equal and sequence1Start are equal, then the objects are ordered by
  216. /// </returns>
  217. public int CompareTo(object obj)
  218. {
  219. WordMatch other = obj as WordMatch;
  220. if (other == null)
  221. {
  222. return -1;
  223. }
  224. else
  225. {
  226. return CompareTo(other);
  227. }
  228. }
  229. #endregion
  230. #region IComparable<WordMatch> Members
  231. /// <summary>
  232. /// Compares two sequence matches so the result can be used in sorting.
  233. /// The comparison is done by size and if the size is equal, by seq1
  234. /// start position. If the equence1 start positions are equal they are
  235. /// sorted by sequence2 start position.
  236. /// </summary>
  237. /// <param name="other">WordMatch object</param>
  238. /// <returns>
  239. /// Returns zero if the objects have the same length, sequence1start and sequence2Start
  240. /// If lengths are equal, then the objects are ordered by sequence1start
  241. /// If lengths are equal and sequence1Start are equal, then the objects are ordered by sequence2start
  242. /// </returns>
  243. public int CompareTo(WordMatch other)
  244. {
  245. if (other != null)
  246. {
  247. if (other.length == length)
  248. {
  249. if (other.sequence1Start == sequence1Start)
  250. {
  251. if (other.sequence2Start == sequence2Start)
  252. {
  253. return 0;
  254. }
  255. else
  256. {
  257. return sequence2Start - other.sequence2Start;
  258. }
  259. }
  260. else
  261. {
  262. return sequence1Start - other.sequence1Start;
  263. }
  264. }
  265. else
  266. {
  267. return other.length - length;
  268. }
  269. }
  270. return -1;
  271. }
  272. #endregion
  273. #region IEquatable<WordMatch> Members
  274. /// <summary>
  275. /// Checks if another WordMatch object is equal to the current
  276. /// object.
  277. /// </summary>
  278. /// <param name="other">WordMatch object to be compared.</param>
  279. /// <returns>
  280. /// true: if the objects are equal else false.
  281. /// </returns>
  282. public bool Equals(WordMatch other)
  283. {
  284. return length == other.length
  285. && sequence1Start == other.sequence1Start
  286. && sequence2Start == other.sequence2Start;
  287. }
  288. #endregion
  289. /// <summary>
  290. /// Determines if a match is within the region which is not overlapped by the
  291. /// match starting at position (deadx1, deady1) or ending at position
  292. /// (deadx2, deady2). If it is in this region
  293. /// (the 'live zone') then true is returned, else false is returned.
  294. /// </summary>
  295. /// <param name="wordMatch">Word Match object which holds the similarity of the two sequences.</param>
  296. /// <param name="deadx1">starting x-position of the region for which overlapped has to be checked.</param>
  297. /// <param name="deady1">starting y-position of the region for which overlapped has to be checked.</param>
  298. /// <param name="deadx2">ending x-position of the region for which overlapped has to be checked.</param>
  299. /// <param name="deady2">ending y-position of the region for which overlapped has to be checked.</param>
  300. /// <param name="wordLength">Wordlength entered by the user</param>
  301. /// <returns>
  302. /// true: if the wordMatch is in the overlapped region, else false.
  303. /// </returns>
  304. private static bool WordDeadZone(WordMatch wordMatch, int deadx1, int deady1, int deadx2, int deady2, int wordLength)
  305. {
  306. int startx;
  307. int starty;
  308. int endx;
  309. int endy;
  310. startx = wordMatch.sequence1Start;
  311. starty = wordMatch.sequence2Start;
  312. endx = wordMatch.sequence1Start + wordMatch.length - 1;
  313. endy = wordMatch.sequence2Start + wordMatch.length - 1;
  314. // Is it in the top right live zone?
  315. if (startx > deadx2 && starty > deady2)
  316. {
  317. return false;
  318. }
  319. // Is it in the bottom right live zone?
  320. if (endx < deadx1 && endy < deady1)
  321. {
  322. return false;
  323. }
  324. // Is it in the top left dead zone?
  325. if (starty >= deady1 && endx <= deadx2)
  326. {
  327. return true;
  328. }
  329. // Is it in the bottom right dead zone?
  330. if (endy <= deady2 && startx >= deadx1)
  331. {
  332. return true;
  333. }
  334. if (endy < deady2)
  335. {
  336. if (startx - starty < deadx1 - deady1)
  337. {
  338. // Crosses deady1
  339. wordMatch.length = deady1 - starty;
  340. }
  341. else if (startx - starty > deadx1 - deady1)
  342. {
  343. // Crosses deadx1
  344. wordMatch.length = deadx1 - startx;
  345. }
  346. }
  347. else if (starty > deady1)
  348. {
  349. if (startx - starty < deadx1 - deady1)
  350. {
  351. // Crosses deadx2
  352. wordMatch.length = endx - deadx2;
  353. wordMatch.sequence1Start = deadx2 + 1;
  354. wordMatch.sequence2Start += deadx2 - startx + 1;
  355. }
  356. else if (startx - starty > deadx1 - deady1)
  357. {
  358. // Crosses deady2
  359. wordMatch.length = endy - deady2;
  360. wordMatch.sequence1Start += deady2 - starty + 1;
  361. wordMatch.sequence2Start = deady2 + 1;
  362. }
  363. }
  364. if (wordMatch.length < wordLength)
  365. {
  366. return true;
  367. }
  368. return false;
  369. }
  370. /// <summary>
  371. /// Finds the sequence in the list of IKmer and returns the list of position
  372. /// of the Kmers.
  373. /// </summary>
  374. /// <param name="sequence">Sequence which has to be matched in the list of IKmer.</param>
  375. /// <param name="kmerList">List of IKmer.</param>
  376. /// <returns>Returns the list of position of IKmer.</returns>
  377. private static IList<int> FindCorrespondingMatch(ISequence sequence, KmersOfSequence kmerList)
  378. {
  379. IList<int> positions = null;
  380. string sequenceString = sequence.ToString();
  381. foreach (KmersOfSequence.KmerPositions kmer in kmerList.Kmers)
  382. {
  383. if (sequenceString.Equals(kmerList.KmerToSequence(kmer).ToString()))
  384. {
  385. positions = kmer.Positions;
  386. break;
  387. }
  388. }
  389. return positions;
  390. }
  391. /// <summary>
  392. /// Overrides hash function for a particular type.
  393. /// </summary>
  394. /// <returns>hash code</returns>
  395. public override int GetHashCode()
  396. {
  397. return base.GetHashCode();
  398. }
  399. /// <summary>
  400. /// Overrides the equal method
  401. /// </summary>
  402. /// <param name="obj">Object to be checked</param>
  403. /// <returns>Is equals</returns>
  404. public override bool Equals(object obj)
  405. {
  406. if (obj == null || GetType() != obj.GetType())
  407. {
  408. return false;
  409. }
  410. return base.Equals(obj);
  411. }
  412. /// <summary>
  413. /// Override equal operator
  414. /// </summary>
  415. /// <param name="leftHandSideObject">LHS object</param>
  416. /// <param name="rightHandSideObject">RHS object</param>
  417. /// <returns>Is LHS == RHS</returns>
  418. public static bool operator ==(WordMatch leftHandSideObject, WordMatch rightHandSideObject)
  419. {
  420. if (System.Object.ReferenceEquals(leftHandSideObject, rightHandSideObject))
  421. {
  422. return true;
  423. }
  424. else
  425. {
  426. return false;
  427. }
  428. }
  429. /// <summary>
  430. /// Override not equal operator
  431. /// </summary>
  432. /// <param name="leftHandSideObject">LHS object</param>
  433. /// <param name="rightHandSideObject">RHS object</param>
  434. /// <returns>Is LHS == RHS</returns>
  435. public static bool operator !=(WordMatch leftHandSideObject, WordMatch rightHandSideObject)
  436. {
  437. return !(leftHandSideObject == rightHandSideObject);
  438. }
  439. /// <summary>
  440. /// Override less than operator
  441. /// </summary>
  442. /// <param name="leftHandSideObject">LHS object</param>
  443. /// <param name="rightHandSideObject">RHS object</param>
  444. /// <returns>Is LHS == RHS</returns>
  445. public static bool operator <(WordMatch leftHandSideObject, WordMatch rightHandSideObject)
  446. {
  447. if (object.ReferenceEquals(leftHandSideObject, null) || object.ReferenceEquals(rightHandSideObject, null))
  448. {
  449. return false;
  450. }
  451. return (leftHandSideObject.CompareTo(rightHandSideObject) < 0);
  452. }
  453. /// <summary>
  454. /// Override greater than operator
  455. /// </summary>
  456. /// <param name="leftHandSideObject">LHS object</param>
  457. /// <param name="rightHandSideObject">RHS object</param>
  458. /// <returns>Is LHS == RHS</returns>
  459. public static bool operator >(WordMatch leftHandSideObject, WordMatch rightHandSideObject)
  460. {
  461. if (object.ReferenceEquals(leftHandSideObject, null) || object.ReferenceEquals(rightHandSideObject, null))
  462. {
  463. return false;
  464. }
  465. return (leftHandSideObject.CompareTo(rightHandSideObject) > 0);
  466. }
  467. }
  468. }