/Samples/Source/MapReduce/MapReduceService/MapReduceJob.cs

https://github.com/DIanbi/lokad-cloud · C# · 131 lines · 79 code · 19 blank · 33 comment · 12 complexity · 0df322799b73b2a3540140eea6789616 MD5 · raw file

  1. #region Copyright (c) Lokad 2009-2011
  2. // This code is released under the terms of the new BSD licence.
  3. // URL: http://www.lokad.com/
  4. #endregion
  5. using System;
  6. using System.Collections.Generic;
  7. using System.Linq;
  8. using Lokad.Cloud.Storage;
  9. namespace Lokad.Cloud.Samples.MapReduce
  10. {
  11. /// <summary>Entry point for setting up and consuming a map/reduce service.</summary>
  12. /// <typeparam name="TMapIn">The type of the items that are input in the map operation.</typeparam>
  13. /// <typeparam name="TMapOut">The type of the items that are output from the map operation.</typeparam>
  14. /// <remarks>All public members are thread-safe.</remarks>
  15. /// <seealso cref="MapReduceBlobSet"/>
  16. /// <seealso cref="MapReduceService"/>
  17. public sealed class MapReduceJob<TMapIn, TMapOut>
  18. {
  19. // HACK: thread-safety is achieved via locks. It would be better to make this class immutable.
  20. string _jobName;
  21. IBlobStorageProvider _blobStorage;
  22. IQueueStorageProvider _queueStorage;
  23. bool _itemsPushed = false;
  24. /// <summary>Initializes a new instance of the
  25. /// <see cref="T:MapReduceJob{TMapIn,TMapOut,TReduceOut}"/> generic class.</summary>
  26. /// <param name="blobStorage">The blob storage provider.</param>
  27. /// <param name="queueStorage">The queue storage provider.</param>
  28. public MapReduceJob(IBlobStorageProvider blobStorage, IQueueStorageProvider queueStorage)
  29. {
  30. if(null == blobStorage) throw new ArgumentNullException("blobStorage");
  31. if(null == queueStorage) throw new ArgumentNullException("queueStorage");
  32. _jobName = Guid.NewGuid().ToString("N");
  33. _blobStorage = blobStorage;
  34. _queueStorage = queueStorage;
  35. }
  36. /// <summary>Initializes a new instance of the
  37. /// <see cref="T:MapReduceJob{TMapIn,TMapOut,TReduceOut}"/> generic class.</summary>
  38. /// <param name="jobId">The ID of the job as previously returned by <see cref="M:PushItems"/>.</param>
  39. /// <param name="blobStorage">The blob storage provider.</param>
  40. /// <param name="queueStorage">The queue storage provider.</param>
  41. public MapReduceJob(string jobId, IBlobStorageProvider blobStorage, IQueueStorageProvider queueStorage)
  42. {
  43. if(null == jobId) throw new ArgumentNullException("jobId");
  44. if(null == blobStorage) throw new ArgumentNullException("blobStorage");
  45. if(null == queueStorage) throw new ArgumentNullException("queueStorage");
  46. _jobName = jobId;
  47. _itemsPushed = true;
  48. _blobStorage = blobStorage;
  49. _queueStorage = queueStorage;
  50. }
  51. /// <summary>Pushes a batch of items for processing.</summary>
  52. /// <param name="functions">The functions for map/reduce/aggregate operations.</param>
  53. /// <param name="items">The items to process (at least two).</param>
  54. /// <param name="workerCount">The max number of workers to use.</param>
  55. /// <returns>The batch ID.</returns>
  56. /// <exception cref="InvalidOperationException">If the method was already called.</exception>
  57. /// <exception cref="ArgumentException">If <paramref name="items"/> contains less than two items.</exception>
  58. public string PushItems(IMapReduceFunctions functions, IList<TMapIn> items, int workerCount)
  59. {
  60. lock(_jobName)
  61. {
  62. if(_itemsPushed) throw new InvalidOperationException("A batch was already pushed to the work queue");
  63. var blobSet = new MapReduceBlobSet(_blobStorage, _queueStorage);
  64. blobSet.GenerateBlobSets(_jobName, new List<object>(from i in items select (object)i), functions, workerCount, typeof(TMapIn), typeof(TMapOut));
  65. _itemsPushed = true;
  66. return _jobName;
  67. }
  68. }
  69. /// <summary>Indicates whether the job is completed.</summary>
  70. /// <returns><c>true</c> if the batch is completed, <c>false</c> otherwise.</returns>
  71. public bool IsCompleted()
  72. {
  73. lock(_jobName)
  74. {
  75. var blobSet = new MapReduceBlobSet(_blobStorage, _queueStorage);
  76. var status = blobSet.GetCompletedBlobSets(_jobName);
  77. if(status.Item1 < status.Item2) return false;
  78. try
  79. {
  80. blobSet.GetAggregatedResult<object>(_jobName);
  81. return true;
  82. }
  83. catch(InvalidOperationException)
  84. {
  85. return false;
  86. }
  87. }
  88. }
  89. /// <summary>Gets the result of a job.</summary>
  90. /// <returns>The result item.</returns>
  91. /// <exception cref="InvalidOperationException">If the result is not ready (<seealso cref="M:IsCompleted"/>).</exception>
  92. public TMapOut GetResult()
  93. {
  94. lock(_jobName)
  95. {
  96. var blobSet = new MapReduceBlobSet(_blobStorage, _queueStorage);
  97. return blobSet.GetAggregatedResult<TMapOut>(_jobName);
  98. }
  99. }
  100. /// <summary>Deletes all the data related to the job.</summary>
  101. /// <remarks>After calling this method, the instance of <see cref="T:MapReduceJob"/>
  102. /// should not be used anymore.</remarks>
  103. public void DeleteJobData()
  104. {
  105. lock(_jobName)
  106. {
  107. var blobSet = new MapReduceBlobSet(_blobStorage, _queueStorage);
  108. blobSet.DeleteJobData(_jobName);
  109. }
  110. }
  111. }
  112. }