/Rhino.Etl.Core/Operations/NestedLoopsJoinOperation.cs

http://github.com/ayende/rhino-etl · C# · 173 lines · 106 code · 12 blank · 55 comment · 27 complexity · ff17406bf2ac41cccd1302f84732817a MD5 · raw file

  1. namespace Rhino.Etl.Core.Operations
  2. {
  3. using Enumerables;
  4. using System;
  5. using System.Collections.Generic;
  6. /// <summary>
  7. /// Perform a join between two sources. The left part of the join is optional and if not specified it will use the current pipeline as input.
  8. /// </summary>
  9. public abstract class NestedLoopsJoinOperation : AbstractJoinOperation
  10. {
  11. private static readonly string IsEmptyRowMarker = Guid.NewGuid().ToString();
  12. private Row currentRightRow, currentLeftRow;
  13. /// <summary>
  14. /// Sets the right part of the join
  15. /// </summary>
  16. /// <value>The right.</value>
  17. public NestedLoopsJoinOperation Right(IOperation value)
  18. {
  19. right.Register(value);
  20. return this;
  21. }
  22. /// <summary>
  23. /// Sets the left part of the join
  24. /// </summary>
  25. /// <value>The left.</value>
  26. public NestedLoopsJoinOperation Left(IOperation value)
  27. {
  28. left.Register(value);
  29. leftRegistered = true;
  30. return this;
  31. }
  32. /// <summary>
  33. /// Executes this operation
  34. /// </summary>
  35. /// <param name="rows">Rows in pipeline. These are only used if a left part of the join was not specified.</param>
  36. /// <returns></returns>
  37. public override IEnumerable<Row> Execute(IEnumerable<Row> rows)
  38. {
  39. PrepareForJoin();
  40. Dictionary<Row, object> matchedRightRows = new Dictionary<Row, object>();
  41. CachingEnumerable<Row> rightEnumerable = new CachingEnumerable<Row>(
  42. new EventRaisingEnumerator(right, right.Execute(null))
  43. );
  44. IEnumerable<Row> execute = left.Execute(leftRegistered ? null : rows);
  45. foreach (Row leftRow in new EventRaisingEnumerator(left, execute))
  46. {
  47. bool leftNeedOuterJoin = true;
  48. currentLeftRow = leftRow;
  49. foreach (Row rightRow in rightEnumerable)
  50. {
  51. currentRightRow = rightRow;
  52. if (MatchJoinCondition(leftRow, rightRow))
  53. {
  54. leftNeedOuterJoin = false;
  55. matchedRightRows[rightRow] = null;
  56. yield return MergeRows(leftRow, rightRow);
  57. }
  58. }
  59. if (leftNeedOuterJoin)
  60. {
  61. Row emptyRow = new Row();
  62. emptyRow[IsEmptyRowMarker] = IsEmptyRowMarker;
  63. currentRightRow = emptyRow;
  64. if (MatchJoinCondition(leftRow, emptyRow))
  65. yield return MergeRows(leftRow, emptyRow);
  66. else
  67. LeftOrphanRow(leftRow);
  68. }
  69. }
  70. foreach (Row rightRow in rightEnumerable)
  71. {
  72. if (matchedRightRows.ContainsKey(rightRow))
  73. continue;
  74. currentRightRow = rightRow;
  75. Row emptyRow = new Row();
  76. emptyRow[IsEmptyRowMarker] = IsEmptyRowMarker;
  77. currentLeftRow = emptyRow;
  78. if (MatchJoinCondition(emptyRow, rightRow))
  79. yield return MergeRows(emptyRow, rightRow);
  80. else
  81. RightOrphanRow(rightRow);
  82. }
  83. }
  84. /// <summary>
  85. /// Check if the two rows match to the join condition.
  86. /// </summary>
  87. /// <param name="leftRow">The left row.</param>
  88. /// <param name="rightRow">The right row.</param>
  89. /// <returns></returns>
  90. protected abstract bool MatchJoinCondition(Row leftRow, Row rightRow);
  91. /// <summary>
  92. /// Perform an inner join equality on the two objects.
  93. /// Null values are not considered equal
  94. /// </summary>
  95. /// <param name="left">The left.</param>
  96. /// <param name="right">The right.</param>
  97. /// <returns></returns>
  98. protected virtual bool InnerJoin(object left, object right)
  99. {
  100. if (IsEmptyRow(currentLeftRow) || IsEmptyRow(currentRightRow))
  101. return false;
  102. if (left == null || right == null)
  103. return false;
  104. return left.Equals(right);
  105. }
  106. private static bool IsEmptyRow(Row row)
  107. {
  108. return row.Contains(IsEmptyRowMarker);
  109. }
  110. /// <summary>
  111. /// Perform an left join equality on the two objects.
  112. /// Null values are not considered equal
  113. /// An empty row on the right side
  114. /// with a value on the left is considered equal
  115. /// </summary>
  116. /// <param name="left">The left.</param>
  117. /// <param name="right">The right.</param>
  118. /// <returns></returns>
  119. protected virtual bool LeftJoin(object left, object right)
  120. {
  121. if (IsEmptyRow(currentRightRow))
  122. return true;
  123. if (left == null || right == null)
  124. return false;
  125. return left.Equals(right);
  126. }
  127. /// <summary>
  128. /// Perform an right join equality on the two objects.
  129. /// Null values are not considered equal
  130. /// An empty row on the left side
  131. /// with a value on the right is considered equal
  132. /// </summary>
  133. /// <param name="left">The left.</param>
  134. /// <param name="right">The right.</param>
  135. /// <returns></returns>
  136. protected virtual bool RightJoin(object left, object right)
  137. {
  138. if (IsEmptyRow(currentLeftRow))
  139. return true;
  140. if (left == null || right == null)
  141. return false;
  142. return left.Equals(right);
  143. }
  144. /// <summary>
  145. /// Perform an full join equality on the two objects.
  146. /// Null values are not considered equal
  147. /// An empty row on either side will satisfy this join
  148. /// </summary>
  149. /// <param name="left">The left.</param>
  150. /// <param name="right">The right.</param>
  151. /// <returns></returns>
  152. protected virtual bool FullJoin(object left, object right)
  153. {
  154. if (IsEmptyRow(currentLeftRow) || IsEmptyRow(currentRightRow))
  155. return true;
  156. if (left == null || right == null)
  157. return false;
  158. return Equals(left, right);
  159. }
  160. }
  161. }