/Code/Source/Teaching.Core/Algorithms/MapReduce.cs
C# | 127 lines | 106 code | 21 blank | 0 comment | 10 complexity | 44c50d03443b8f32930f9c9b23049a27 MD5 | raw file
- using System;
- using System.Collections.Generic;
- using System.Diagnostics.Contracts;
- using System.Linq;
-
- namespace Teaching.Core.Algorithms
- {
- public enum MapReduceExecutionMode
- {
- SingleThreaded,
- MultiThreadedNaive,
- MultiThreadedOptimized,
- }
-
- public static class MapReduce
- {
- internal class BatchedGrouping<TKey,TElement> : IGrouping<TKey,TElement>
- {
- private readonly TKey key;
- private readonly IEnumerable<TElement> elements;
-
- public TKey Key
- {
- get { return key; }
- }
-
- public IEnumerator<TElement> GetEnumerator()
- {
- return elements.GetEnumerator();
- }
-
- System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
- {
- return GetEnumerator();
- }
-
- internal BatchedGrouping( TKey key,IEnumerable<TElement> elements )
- {
- this.key = key;
- this.elements = elements;
- }
- }
-
- public static IEnumerable<TReduceOutput> Execute<TMapInput,TReduceOutput>(this IEnumerable<TMapInput> input,Func<TMapInput,IEnumerable<TReduceOutput>> mapFunction, Func<IEnumerable<TReduceOutput>,TReduceOutput> reduceFunction,IEqualityComparer<TReduceOutput> keyComparer = null,MapReduceExecutionMode executionMode = MapReduceExecutionMode.SingleThreaded )
- {
- return Execute(input, mapFunction, reduceFunction, r => r, keyComparer,executionMode);
- }
-
- public static IEnumerable<TReduceOutput> Execute<TMapInput,TReduceOutput,TReduceKey>(this IEnumerable<TMapInput> input,Func<TMapInput,IEnumerable<TReduceOutput>> mapFunction, Func<IEnumerable<TReduceOutput>,TReduceOutput> reduceFunction,Func<TReduceOutput,TReduceKey> keySelector,IEqualityComparer<TReduceKey> keyComparer = null,MapReduceExecutionMode executionMode = MapReduceExecutionMode.SingleThreaded )
- {
- Contract.Requires<ArgumentNullException>(input != null,"input");
- Contract.Requires<ArgumentNullException>(mapFunction != null,"mapFunction");
- Contract.Requires<ArgumentNullException>(reduceFunction != null,"reduceFunction");
- Contract.Requires<ArgumentNullException>(keySelector != null,"keySelector");
-
- keyComparer = keyComparer ?? EqualityComparer<TReduceKey>.Default;
-
- switch(executionMode)
- {
- case MapReduceExecutionMode.MultiThreadedNaive:
- case MapReduceExecutionMode.MultiThreadedOptimized:
- return null;
- }
-
- return ExecuteSingleThreaded(input, mapFunction, reduceFunction, keySelector, keyComparer);
- }
-
- private static IEnumerable<TReduceOutput> ExecuteSingleThreaded<TMapInput,TReduceOutput,TReduceKey>(IEnumerable<TMapInput> input,Func<TMapInput,IEnumerable<TReduceOutput>> mapFunction, Func<IEnumerable<TReduceOutput>,TReduceOutput> reduceFunction,Func<TReduceOutput,TReduceKey> keySelector,IEqualityComparer<TReduceKey> keyComparer )
- {
- var mappedData = input.SelectMany(mapFunction);
- var reduceGroups = GroupByBatched(mappedData, keySelector, keyComparer,8);
-
- while (reduceGroups.Any())
- {
- var itemsForNextCycle = new List<TReduceOutput>();
-
- foreach (var reduceGroup in reduceGroups)
- {
- if (reduceGroup.Count() > 1)
- {
- var reducedItem = reduceFunction(reduceGroup);
- itemsForNextCycle.Add(reducedItem);
- }
- else
- {
- yield return reduceGroup.First();
- }
- }
-
- reduceGroups = GroupByBatched(itemsForNextCycle, keySelector, keyComparer, 8);
- }
- }
-
- private static IEnumerable<IGrouping<TKey,TElement>> GroupByBatched<TKey,TElement>(IEnumerable<TElement> elements,Func<TElement,TKey> keySelector,IEqualityComparer<TKey> keyComparer,int batchSize)
- {
- var itemBuffer = new Dictionary<TKey, List<TElement>>(keyComparer);
-
- foreach( var element in elements )
- {
- var key = keySelector(element);
-
- List<TElement> groupBuffer = null;
- if ( !itemBuffer.TryGetValue(key,out groupBuffer))
- {
- groupBuffer = new List<TElement>(batchSize);
- itemBuffer.Add(key, groupBuffer);
- }
-
- groupBuffer.Add(element);
-
- if ( groupBuffer.Count == batchSize)
- {
- yield return new BatchedGrouping<TKey, TElement>(key,groupBuffer.ToArray());
- groupBuffer.Clear();
- }
- }
-
- foreach( var kv in itemBuffer )
- {
- if ( kv.Value.Count > 0 )
- {
- yield return new BatchedGrouping<TKey, TElement>(kv.Key,kv.Value.ToArray());
- }
- }
- }
- }
- }