PageRenderTime 49ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/Code/Source/Teaching.Core/Algorithms/MapReduce.cs

https://bitbucket.org/BernhardGlueck/teaching
C# | 127 lines | 106 code | 21 blank | 0 comment | 10 complexity | 44c50d03443b8f32930f9c9b23049a27 MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics.Contracts;
  4. using System.Linq;
  5. namespace Teaching.Core.Algorithms
  6. {
  7. public enum MapReduceExecutionMode
  8. {
  9. SingleThreaded,
  10. MultiThreadedNaive,
  11. MultiThreadedOptimized,
  12. }
  13. public static class MapReduce
  14. {
  15. internal class BatchedGrouping<TKey,TElement> : IGrouping<TKey,TElement>
  16. {
  17. private readonly TKey key;
  18. private readonly IEnumerable<TElement> elements;
  19. public TKey Key
  20. {
  21. get { return key; }
  22. }
  23. public IEnumerator<TElement> GetEnumerator()
  24. {
  25. return elements.GetEnumerator();
  26. }
  27. System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
  28. {
  29. return GetEnumerator();
  30. }
  31. internal BatchedGrouping( TKey key,IEnumerable<TElement> elements )
  32. {
  33. this.key = key;
  34. this.elements = elements;
  35. }
  36. }
  37. public static IEnumerable<TReduceOutput> Execute<TMapInput,TReduceOutput>(this IEnumerable<TMapInput> input,Func<TMapInput,IEnumerable<TReduceOutput>> mapFunction, Func<IEnumerable<TReduceOutput>,TReduceOutput> reduceFunction,IEqualityComparer<TReduceOutput> keyComparer = null,MapReduceExecutionMode executionMode = MapReduceExecutionMode.SingleThreaded )
  38. {
  39. return Execute(input, mapFunction, reduceFunction, r => r, keyComparer,executionMode);
  40. }
  41. public static IEnumerable<TReduceOutput> Execute<TMapInput,TReduceOutput,TReduceKey>(this IEnumerable<TMapInput> input,Func<TMapInput,IEnumerable<TReduceOutput>> mapFunction, Func<IEnumerable<TReduceOutput>,TReduceOutput> reduceFunction,Func<TReduceOutput,TReduceKey> keySelector,IEqualityComparer<TReduceKey> keyComparer = null,MapReduceExecutionMode executionMode = MapReduceExecutionMode.SingleThreaded )
  42. {
  43. Contract.Requires<ArgumentNullException>(input != null,"input");
  44. Contract.Requires<ArgumentNullException>(mapFunction != null,"mapFunction");
  45. Contract.Requires<ArgumentNullException>(reduceFunction != null,"reduceFunction");
  46. Contract.Requires<ArgumentNullException>(keySelector != null,"keySelector");
  47. keyComparer = keyComparer ?? EqualityComparer<TReduceKey>.Default;
  48. switch(executionMode)
  49. {
  50. case MapReduceExecutionMode.MultiThreadedNaive:
  51. case MapReduceExecutionMode.MultiThreadedOptimized:
  52. return null;
  53. }
  54. return ExecuteSingleThreaded(input, mapFunction, reduceFunction, keySelector, keyComparer);
  55. }
  56. private static IEnumerable<TReduceOutput> ExecuteSingleThreaded<TMapInput,TReduceOutput,TReduceKey>(IEnumerable<TMapInput> input,Func<TMapInput,IEnumerable<TReduceOutput>> mapFunction, Func<IEnumerable<TReduceOutput>,TReduceOutput> reduceFunction,Func<TReduceOutput,TReduceKey> keySelector,IEqualityComparer<TReduceKey> keyComparer )
  57. {
  58. var mappedData = input.SelectMany(mapFunction);
  59. var reduceGroups = GroupByBatched(mappedData, keySelector, keyComparer,8);
  60. while (reduceGroups.Any())
  61. {
  62. var itemsForNextCycle = new List<TReduceOutput>();
  63. foreach (var reduceGroup in reduceGroups)
  64. {
  65. if (reduceGroup.Count() > 1)
  66. {
  67. var reducedItem = reduceFunction(reduceGroup);
  68. itemsForNextCycle.Add(reducedItem);
  69. }
  70. else
  71. {
  72. yield return reduceGroup.First();
  73. }
  74. }
  75. reduceGroups = GroupByBatched(itemsForNextCycle, keySelector, keyComparer, 8);
  76. }
  77. }
  78. private static IEnumerable<IGrouping<TKey,TElement>> GroupByBatched<TKey,TElement>(IEnumerable<TElement> elements,Func<TElement,TKey> keySelector,IEqualityComparer<TKey> keyComparer,int batchSize)
  79. {
  80. var itemBuffer = new Dictionary<TKey, List<TElement>>(keyComparer);
  81. foreach( var element in elements )
  82. {
  83. var key = keySelector(element);
  84. List<TElement> groupBuffer = null;
  85. if ( !itemBuffer.TryGetValue(key,out groupBuffer))
  86. {
  87. groupBuffer = new List<TElement>(batchSize);
  88. itemBuffer.Add(key, groupBuffer);
  89. }
  90. groupBuffer.Add(element);
  91. if ( groupBuffer.Count == batchSize)
  92. {
  93. yield return new BatchedGrouping<TKey, TElement>(key,groupBuffer.ToArray());
  94. groupBuffer.Clear();
  95. }
  96. }
  97. foreach( var kv in itemBuffer )
  98. {
  99. if ( kv.Value.Count > 0 )
  100. {
  101. yield return new BatchedGrouping<TKey, TElement>(kv.Key,kv.Value.ToArray());
  102. }
  103. }
  104. }
  105. }
  106. }