PageRenderTime 882ms CodeModel.GetById 599ms app.highlight 10ms RepoModel.GetById 192ms app.codeStats 0ms

/Rhino.Etl.Core/Operations/JoinOperation.cs

http://github.com/ayende/rhino-etl
C# | 227 lines | 149 code | 19 blank | 59 comment | 11 complexity | 0b1efa7b202b86ee2687079a1614db09 MD5 | raw file
  1namespace Rhino.Etl.Core.Operations
  2{
  3    using Enumerables;
  4    using System;
  5    using System.Collections.Generic;
  6
  7    /// <summary>
  8    /// Perform a join between two sources. The left part of the join is optional and if not specified it will use the current pipeline as input.
  9    /// </summary>
 10    public abstract class JoinOperation : AbstractJoinOperation
 11    {
 12        private JoinType jointype;
 13        private string[] leftColumns;
 14        private string[] rightColumns;
 15        private Dictionary<Row, object> rightRowsWereMatched = new Dictionary<Row, object>();
 16        private Dictionary<ObjectArrayKeys, List<Row>> rightRowsByJoinKey = new Dictionary<ObjectArrayKeys, List<Row>>();
 17
 18        /// <summary>
 19        /// Sets the right part of the join
 20        /// </summary>
 21        /// <value>The right.</value>
 22        public JoinOperation Right(IOperation value)
 23        {
 24            right.Register(value);
 25            return this;
 26        }
 27
 28        /// <summary>
 29        /// Sets the left part of the join
 30        /// </summary>
 31        /// <value>The left.</value>
 32        public JoinOperation Left(IOperation value)
 33        {
 34            left.Register(value);
 35            leftRegistered = true;
 36            return this;
 37        }
 38
 39        /// <summary>
 40        /// Executes this operation
 41        /// </summary>
 42        /// <param name="rows">Rows in pipeline. These are only used if a left part of the join was not specified.</param>
 43        /// <returns></returns>
 44        public override IEnumerable<Row> Execute(IEnumerable<Row> rows)
 45        {
 46            PrepareForJoin();
 47
 48            SetupJoinConditions();
 49            Guard.Against(leftColumns == null, "You must setup the left columns");
 50            Guard.Against(rightColumns == null, "You must setup the right columns");
 51
 52            IEnumerable<Row> rightEnumerable = GetRightEnumerable();
 53
 54            IEnumerable<Row> execute = left.Execute(leftRegistered ? null : rows);
 55            foreach (Row leftRow in new EventRaisingEnumerator(left, execute))
 56            {
 57                ObjectArrayKeys key = leftRow.CreateKey(leftColumns);
 58                List<Row> rightRows;
 59                if (this.rightRowsByJoinKey.TryGetValue(key, out rightRows))
 60                {
 61                    foreach (Row rightRow in rightRows)
 62                    {
 63                        rightRowsWereMatched[rightRow] = null;
 64                        yield return MergeRows(leftRow, rightRow);
 65                    }
 66                }
 67                else if ((jointype & JoinType.Left) != 0)
 68                {
 69                    Row emptyRow = new Row();
 70                    yield return MergeRows(leftRow, emptyRow);
 71                }
 72                else
 73                {
 74                    LeftOrphanRow(leftRow);
 75                }
 76            }
 77            foreach (Row rightRow in rightEnumerable)
 78            {
 79                if (rightRowsWereMatched.ContainsKey(rightRow))
 80                    continue;
 81                Row emptyRow = new Row();
 82                if ((jointype & JoinType.Right) != 0)
 83                    yield return MergeRows(emptyRow, rightRow);
 84                else
 85                    RightOrphanRow(rightRow);
 86            }
 87        }
 88
 89        private IEnumerable<Row> GetRightEnumerable()
 90        {
 91            IEnumerable<Row> rightEnumerable = new CachingEnumerable<Row>(
 92                new EventRaisingEnumerator(right, right.Execute(null))
 93                );
 94            foreach (Row row in rightEnumerable)
 95            {
 96                ObjectArrayKeys key = row.CreateKey(rightColumns);
 97                List<Row> rowsForKey;
 98                if (this.rightRowsByJoinKey.TryGetValue(key, out rowsForKey) == false)
 99                {
100                    this.rightRowsByJoinKey[key] = rowsForKey = new List<Row>();
101                }
102                rowsForKey.Add(row);
103            }
104            return rightEnumerable;
105        }
106
107        /// <summary>
108        /// Setups the join conditions.
109        /// </summary>
110        protected abstract void SetupJoinConditions();
111
112        /// <summary>
113        /// Create an inner join
114        /// </summary>
115        /// <value>The inner.</value>
116        protected JoinBuilder InnerJoin
117        {
118            get { return new JoinBuilder(this, JoinType.Inner); }
119        }
120
121        /// <summary>
122        /// Create a left outer join
123        /// </summary>
124        /// <value>The inner.</value>
125        protected JoinBuilder LeftJoin
126        {
127            get { return new JoinBuilder(this, JoinType.Left); }
128        }
129
130        /// <summary>
131        /// Create a right outer join
132        /// </summary>
133        /// <value>The inner.</value>
134        protected JoinBuilder RightJoin
135        {
136            get { return new JoinBuilder(this, JoinType.Right); }
137        }
138
139        /// <summary>
140        /// Create a full outer join
141        /// </summary>
142        /// <value>The inner.</value>
143        protected JoinBuilder FullOuterJoin
144        {
145            get { return new JoinBuilder(this, JoinType.Full); }
146        }
147
148        /// <summary>
149        /// Fluent interface to create joins
150        /// </summary>
151        public class JoinBuilder
152        {
153            private readonly JoinOperation parent;
154
155            /// <summary>
156            /// Initializes a new instance of the <see cref="JoinBuilder"/> class.
157            /// </summary>
158            /// <param name="parent">The parent.</param>
159            /// <param name="joinType">Type of the join.</param>
160            public JoinBuilder(JoinOperation parent, JoinType joinType)
161            {
162                this.parent = parent;
163                parent.jointype = joinType;
164            }
165
166            /// <summary>
167            /// Setup the left side of the join
168            /// </summary>
169            /// <param name="columns">The columns.</param>
170            /// <returns></returns>
171            public JoinBuilder Left(params string[] columns)
172            {
173                parent.leftColumns = columns;
174                return this;
175            }
176
177            /// <summary>
178            /// Setup the right side of the join
179            /// </summary>
180            /// <param name="columns">The columns.</param>
181            /// <returns></returns>
182            public JoinBuilder Right(params string[] columns)
183            {
184                parent.rightColumns = columns;
185                return this;
186            }
187        }
188
189        ///    <summary>
190        ///    Occurs when    a row is processed.
191        ///    </summary>
192        public override event Action<IOperation, Row> OnRowProcessed
193        {
194            add
195            {
196                foreach (IOperation operation in new[] { left, right })
197                    operation.OnRowProcessed += value;
198                base.OnRowProcessed += value;
199            }
200            remove
201            {
202                foreach (IOperation operation in new[] { left, right })
203                    operation.OnRowProcessed -= value;
204                base.OnRowProcessed -= value;
205            }
206        }
207
208        ///    <summary>
209        ///    Occurs when    all    the    rows has finished processing.
210        ///    </summary>
211        public override event Action<IOperation> OnFinishedProcessing
212        {
213            add
214            {
215                foreach (IOperation operation in new[] { left, right })
216                    operation.OnFinishedProcessing += value;
217                base.OnFinishedProcessing += value;
218            }
219            remove
220            {
221                foreach (IOperation operation in new[] { left, right })
222                    operation.OnFinishedProcessing -= value;
223                base.OnFinishedProcessing -= value;
224            }
225        }
226    }
227}