/Rhino.Etl.Core/Operations/JoinOperation.cs

http://github.com/ayende/rhino-etl · C# · 227 lines · 149 code · 19 blank · 59 comment · 11 complexity · 0b1efa7b202b86ee2687079a1614db09 MD5 · raw file

  1. namespace Rhino.Etl.Core.Operations
  2. {
  3. using Enumerables;
  4. using System;
  5. using System.Collections.Generic;
  6. /// <summary>
  7. /// Perform a join between two sources. The left part of the join is optional and if not specified it will use the current pipeline as input.
  8. /// </summary>
  9. public abstract class JoinOperation : AbstractJoinOperation
  10. {
  11. private JoinType jointype;
  12. private string[] leftColumns;
  13. private string[] rightColumns;
  14. private Dictionary<Row, object> rightRowsWereMatched = new Dictionary<Row, object>();
  15. private Dictionary<ObjectArrayKeys, List<Row>> rightRowsByJoinKey = new Dictionary<ObjectArrayKeys, List<Row>>();
  16. /// <summary>
  17. /// Sets the right part of the join
  18. /// </summary>
  19. /// <value>The right.</value>
  20. public JoinOperation Right(IOperation value)
  21. {
  22. right.Register(value);
  23. return this;
  24. }
  25. /// <summary>
  26. /// Sets the left part of the join
  27. /// </summary>
  28. /// <value>The left.</value>
  29. public JoinOperation Left(IOperation value)
  30. {
  31. left.Register(value);
  32. leftRegistered = true;
  33. return this;
  34. }
  35. /// <summary>
  36. /// Executes this operation
  37. /// </summary>
  38. /// <param name="rows">Rows in pipeline. These are only used if a left part of the join was not specified.</param>
  39. /// <returns></returns>
  40. public override IEnumerable<Row> Execute(IEnumerable<Row> rows)
  41. {
  42. PrepareForJoin();
  43. SetupJoinConditions();
  44. Guard.Against(leftColumns == null, "You must setup the left columns");
  45. Guard.Against(rightColumns == null, "You must setup the right columns");
  46. IEnumerable<Row> rightEnumerable = GetRightEnumerable();
  47. IEnumerable<Row> execute = left.Execute(leftRegistered ? null : rows);
  48. foreach (Row leftRow in new EventRaisingEnumerator(left, execute))
  49. {
  50. ObjectArrayKeys key = leftRow.CreateKey(leftColumns);
  51. List<Row> rightRows;
  52. if (this.rightRowsByJoinKey.TryGetValue(key, out rightRows))
  53. {
  54. foreach (Row rightRow in rightRows)
  55. {
  56. rightRowsWereMatched[rightRow] = null;
  57. yield return MergeRows(leftRow, rightRow);
  58. }
  59. }
  60. else if ((jointype & JoinType.Left) != 0)
  61. {
  62. Row emptyRow = new Row();
  63. yield return MergeRows(leftRow, emptyRow);
  64. }
  65. else
  66. {
  67. LeftOrphanRow(leftRow);
  68. }
  69. }
  70. foreach (Row rightRow in rightEnumerable)
  71. {
  72. if (rightRowsWereMatched.ContainsKey(rightRow))
  73. continue;
  74. Row emptyRow = new Row();
  75. if ((jointype & JoinType.Right) != 0)
  76. yield return MergeRows(emptyRow, rightRow);
  77. else
  78. RightOrphanRow(rightRow);
  79. }
  80. }
  81. private IEnumerable<Row> GetRightEnumerable()
  82. {
  83. IEnumerable<Row> rightEnumerable = new CachingEnumerable<Row>(
  84. new EventRaisingEnumerator(right, right.Execute(null))
  85. );
  86. foreach (Row row in rightEnumerable)
  87. {
  88. ObjectArrayKeys key = row.CreateKey(rightColumns);
  89. List<Row> rowsForKey;
  90. if (this.rightRowsByJoinKey.TryGetValue(key, out rowsForKey) == false)
  91. {
  92. this.rightRowsByJoinKey[key] = rowsForKey = new List<Row>();
  93. }
  94. rowsForKey.Add(row);
  95. }
  96. return rightEnumerable;
  97. }
  98. /// <summary>
  99. /// Setups the join conditions.
  100. /// </summary>
  101. protected abstract void SetupJoinConditions();
  102. /// <summary>
  103. /// Create an inner join
  104. /// </summary>
  105. /// <value>The inner.</value>
  106. protected JoinBuilder InnerJoin
  107. {
  108. get { return new JoinBuilder(this, JoinType.Inner); }
  109. }
  110. /// <summary>
  111. /// Create a left outer join
  112. /// </summary>
  113. /// <value>The inner.</value>
  114. protected JoinBuilder LeftJoin
  115. {
  116. get { return new JoinBuilder(this, JoinType.Left); }
  117. }
  118. /// <summary>
  119. /// Create a right outer join
  120. /// </summary>
  121. /// <value>The inner.</value>
  122. protected JoinBuilder RightJoin
  123. {
  124. get { return new JoinBuilder(this, JoinType.Right); }
  125. }
  126. /// <summary>
  127. /// Create a full outer join
  128. /// </summary>
  129. /// <value>The inner.</value>
  130. protected JoinBuilder FullOuterJoin
  131. {
  132. get { return new JoinBuilder(this, JoinType.Full); }
  133. }
  134. /// <summary>
  135. /// Fluent interface to create joins
  136. /// </summary>
  137. public class JoinBuilder
  138. {
  139. private readonly JoinOperation parent;
  140. /// <summary>
  141. /// Initializes a new instance of the <see cref="JoinBuilder"/> class.
  142. /// </summary>
  143. /// <param name="parent">The parent.</param>
  144. /// <param name="joinType">Type of the join.</param>
  145. public JoinBuilder(JoinOperation parent, JoinType joinType)
  146. {
  147. this.parent = parent;
  148. parent.jointype = joinType;
  149. }
  150. /// <summary>
  151. /// Setup the left side of the join
  152. /// </summary>
  153. /// <param name="columns">The columns.</param>
  154. /// <returns></returns>
  155. public JoinBuilder Left(params string[] columns)
  156. {
  157. parent.leftColumns = columns;
  158. return this;
  159. }
  160. /// <summary>
  161. /// Setup the right side of the join
  162. /// </summary>
  163. /// <param name="columns">The columns.</param>
  164. /// <returns></returns>
  165. public JoinBuilder Right(params string[] columns)
  166. {
  167. parent.rightColumns = columns;
  168. return this;
  169. }
  170. }
  171. /// <summary>
  172. /// Occurs when a row is processed.
  173. /// </summary>
  174. public override event Action<IOperation, Row> OnRowProcessed
  175. {
  176. add
  177. {
  178. foreach (IOperation operation in new[] { left, right })
  179. operation.OnRowProcessed += value;
  180. base.OnRowProcessed += value;
  181. }
  182. remove
  183. {
  184. foreach (IOperation operation in new[] { left, right })
  185. operation.OnRowProcessed -= value;
  186. base.OnRowProcessed -= value;
  187. }
  188. }
  189. /// <summary>
  190. /// Occurs when all the rows has finished processing.
  191. /// </summary>
  192. public override event Action<IOperation> OnFinishedProcessing
  193. {
  194. add
  195. {
  196. foreach (IOperation operation in new[] { left, right })
  197. operation.OnFinishedProcessing += value;
  198. base.OnFinishedProcessing += value;
  199. }
  200. remove
  201. {
  202. foreach (IOperation operation in new[] { left, right })
  203. operation.OnFinishedProcessing -= value;
  204. base.OnFinishedProcessing -= value;
  205. }
  206. }
  207. }
  208. }