PageRenderTime 344ms CodeModel.GetById 161ms app.highlight 7ms RepoModel.GetById 174ms app.codeStats 0ms

/Rhino.Etl.Core/Operations/NestedLoopsJoinOperation.cs

http://github.com/ayende/rhino-etl
C# | 173 lines | 106 code | 12 blank | 55 comment | 27 complexity | ff17406bf2ac41cccd1302f84732817a MD5 | raw file
  1namespace Rhino.Etl.Core.Operations
  2{
  3    using Enumerables;
  4    using System;
  5    using System.Collections.Generic;
  6
  7    /// <summary>
  8    /// Perform a join between two sources. The left part of the join is optional and if not specified it will use the current pipeline as input.
  9    /// </summary>
 10    public abstract class NestedLoopsJoinOperation : AbstractJoinOperation
 11    {
 12        private static readonly string IsEmptyRowMarker = Guid.NewGuid().ToString();
 13
 14        private Row currentRightRow, currentLeftRow;
 15
 16        /// <summary>
 17        /// Sets the right part of the join
 18        /// </summary>
 19        /// <value>The right.</value>
 20        public NestedLoopsJoinOperation Right(IOperation value)
 21        {
 22            right.Register(value);
 23            return this;
 24        }
 25
 26        /// <summary>
 27        /// Sets the left part of the join
 28        /// </summary>
 29        /// <value>The left.</value>
 30        public NestedLoopsJoinOperation Left(IOperation value)
 31        {
 32            left.Register(value);
 33            leftRegistered = true;
 34            return this;
 35        }
 36
 37        /// <summary>
 38        /// Executes this operation
 39        /// </summary>
 40        /// <param name="rows">Rows in pipeline. These are only used if a left part of the join was not specified.</param>
 41        /// <returns></returns>
 42        public override IEnumerable<Row> Execute(IEnumerable<Row> rows)
 43        {
 44            PrepareForJoin();
 45
 46            Dictionary<Row, object> matchedRightRows = new Dictionary<Row, object>();
 47            CachingEnumerable<Row> rightEnumerable = new CachingEnumerable<Row>(
 48                new EventRaisingEnumerator(right, right.Execute(null))
 49                );
 50            IEnumerable<Row> execute = left.Execute(leftRegistered ? null : rows);
 51            foreach (Row leftRow in new EventRaisingEnumerator(left, execute))
 52            {
 53                bool leftNeedOuterJoin = true;
 54                currentLeftRow = leftRow;
 55                foreach (Row rightRow in rightEnumerable)
 56                {
 57                    currentRightRow = rightRow;
 58                    if (MatchJoinCondition(leftRow, rightRow))
 59                    {
 60                        leftNeedOuterJoin = false;
 61                        matchedRightRows[rightRow] = null;
 62                        yield return MergeRows(leftRow, rightRow);
 63                    }
 64                }
 65                if (leftNeedOuterJoin)
 66                {
 67                    Row emptyRow = new Row();
 68                    emptyRow[IsEmptyRowMarker] = IsEmptyRowMarker;
 69                    currentRightRow = emptyRow;
 70                    if (MatchJoinCondition(leftRow, emptyRow))
 71                        yield return MergeRows(leftRow, emptyRow);
 72                    else
 73                        LeftOrphanRow(leftRow);
 74                }
 75            }
 76            foreach (Row rightRow in rightEnumerable)
 77            {
 78                if (matchedRightRows.ContainsKey(rightRow))
 79                    continue;
 80                currentRightRow = rightRow;
 81                Row emptyRow = new Row();
 82                emptyRow[IsEmptyRowMarker] = IsEmptyRowMarker;
 83                currentLeftRow = emptyRow;
 84                if (MatchJoinCondition(emptyRow, rightRow))
 85                    yield return MergeRows(emptyRow, rightRow);
 86                else
 87                    RightOrphanRow(rightRow);
 88            }
 89        }
 90
 91        /// <summary>
 92        /// Check if the two rows match to the join condition.
 93        /// </summary>
 94        /// <param name="leftRow">The left row.</param>
 95        /// <param name="rightRow">The right row.</param>
 96        /// <returns></returns>
 97        protected abstract bool MatchJoinCondition(Row leftRow, Row rightRow);
 98
 99        /// <summary>
100        /// Perform an inner join equality on the two objects.
101        /// Null values are not considered equal
102        /// </summary>
103        /// <param name="left">The left.</param>
104        /// <param name="right">The right.</param>
105        /// <returns></returns>
106        protected virtual bool InnerJoin(object left, object right)
107        {
108            if (IsEmptyRow(currentLeftRow) || IsEmptyRow(currentRightRow))
109                return false;
110            if (left == null || right == null)
111                return false;
112            return left.Equals(right);
113        }
114
115        private static bool IsEmptyRow(Row row)
116        {
117            return row.Contains(IsEmptyRowMarker);
118        }
119
120        /// <summary>
121        /// Perform an left join equality on the two objects.
122        /// Null values are not considered equal
123        /// An empty row on the right side
124        /// with a value on the left is considered equal
125        /// </summary>
126        /// <param name="left">The left.</param>
127        /// <param name="right">The right.</param>
128        /// <returns></returns>
129        protected virtual bool LeftJoin(object left, object right)
130        {
131            if (IsEmptyRow(currentRightRow))
132                return true;
133            if (left == null || right == null)
134                return false;
135            return left.Equals(right);
136        }
137
138        /// <summary>
139        /// Perform an right join equality on the two objects.
140        /// Null values are not considered equal
141        /// An empty row on the left side
142        /// with a value on the right is considered equal
143        /// </summary>
144        /// <param name="left">The left.</param>
145        /// <param name="right">The right.</param>
146        /// <returns></returns>
147        protected virtual bool RightJoin(object left, object right)
148        {
149            if (IsEmptyRow(currentLeftRow))
150                return true;
151            if (left == null || right == null)
152                return false;
153            return left.Equals(right);
154        }
155
156        /// <summary>
157        /// Perform an full join equality on the two objects.
158        /// Null values are not considered equal
159        /// An empty row on either side will satisfy this join
160        /// </summary>
161        /// <param name="left">The left.</param>
162        /// <param name="right">The right.</param>
163        /// <returns></returns>
164        protected virtual bool FullJoin(object left, object right)
165        {
166            if (IsEmptyRow(currentLeftRow) || IsEmptyRow(currentRightRow))
167                return true;
168            if (left == null || right == null)
169                return false;
170            return Equals(left, right);
171        }
172    }
173}