PageRenderTime 45ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/Raven.Database/Indexing/ReducingExecuter.cs

https://github.com/nwendel/ravendb
C# | 391 lines | 321 code | 59 blank | 11 comment | 36 complexity | 0b3c8e17a17729e02befb456c0a72765 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, CC-BY-SA-3.0
  1. using System;
  2. using System.Collections.Concurrent;
  3. using System.Collections.Generic;
  4. using System.Diagnostics;
  5. using System.Linq;
  6. using System.Threading;
  7. using Raven.Abstractions.Data;
  8. using Raven.Abstractions.Extensions;
  9. using Raven.Abstractions.Logging;
  10. using Raven.Database.Json;
  11. using Raven.Database.Linq;
  12. using Raven.Database.Storage;
  13. using Raven.Database.Tasks;
  14. using Raven.Database.Util;
  15. namespace Raven.Database.Indexing
  16. {
  17. public class ReducingExecuter : AbstractIndexingExecuter
  18. {
  19. public ReducingExecuter(WorkContext context)
  20. : base(context)
  21. {
  22. autoTuner = new ReduceBatchSizeAutoTuner(context);
  23. }
  24. protected void HandleReduceForIndex(IndexToWorkOn indexToWorkOn)
  25. {
  26. var viewGenerator = context.IndexDefinitionStorage.GetViewGenerator(indexToWorkOn.IndexId);
  27. if (viewGenerator == null)
  28. return;
  29. bool operationCanceled = false;
  30. var itemsToDelete = new ConcurrentSet<object>();
  31. IList<ReduceTypePerKey> mappedResultsInfo = null;
  32. transactionalStorage.Batch(actions =>
  33. {
  34. mappedResultsInfo = actions.MapReduce.GetReduceTypesPerKeys(indexToWorkOn.IndexId,
  35. context.CurrentNumberOfItemsToReduceInSingleBatch,
  36. context.NumberOfItemsToExecuteReduceInSingleStep).ToList();
  37. });
  38. var singleStepReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).Select(x => x.ReduceKey).ToArray();
  39. var multiStepsReduceKeys = mappedResultsInfo.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).Select(x => x.ReduceKey).ToArray();
  40. currentlyProcessedIndexes.TryAdd(indexToWorkOn.IndexId, indexToWorkOn.Index);
  41. try
  42. {
  43. if (singleStepReduceKeys.Length > 0)
  44. {
  45. Log.Debug("SingleStep reduce for keys: {0}",singleStepReduceKeys.Select(x => x + ","));
  46. SingleStepReduce(indexToWorkOn, singleStepReduceKeys, viewGenerator, itemsToDelete);
  47. }
  48. if (multiStepsReduceKeys.Length > 0)
  49. {
  50. Log.Debug("MultiStep reduce for keys: {0}", singleStepReduceKeys.Select(x => x + ","));
  51. MultiStepReduce(indexToWorkOn, multiStepsReduceKeys, viewGenerator, itemsToDelete);
  52. }
  53. }
  54. catch (OperationCanceledException)
  55. {
  56. operationCanceled = true;
  57. }
  58. finally
  59. {
  60. if (operationCanceled == false)
  61. {
  62. // whatever we succeeded in indexing or not, we have to update this
  63. // because otherwise we keep trying to re-index failed mapped results
  64. transactionalStorage.Batch(actions =>
  65. {
  66. var latest = actions.MapReduce.DeleteScheduledReduction(itemsToDelete);
  67. if (latest == null)
  68. return;
  69. actions.Indexing.UpdateLastReduced(indexToWorkOn.Index.indexId, latest.Etag, latest.Timestamp);
  70. });
  71. }
  72. Index _;
  73. currentlyProcessedIndexes.TryRemove(indexToWorkOn.IndexId, out _);
  74. }
  75. }
  76. protected override void UpdateStalenessMetrics(int staleCount)
  77. {
  78. context.MetricsCounters.StaleIndexReduces.Update(staleCount);
  79. }
  80. private void MultiStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator, ConcurrentSet<object> itemsToDelete)
  81. {
  82. var needToMoveToMultiStep = new HashSet<string>();
  83. transactionalStorage.Batch(actions =>
  84. {
  85. foreach (var localReduceKey in keysToReduce)
  86. {
  87. var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, localReduceKey);
  88. if (lastPerformedReduceType != ReduceType.MultiStep)
  89. needToMoveToMultiStep.Add(localReduceKey);
  90. if (lastPerformedReduceType != ReduceType.SingleStep)
  91. continue;
  92. // we exceeded the limit of items to reduce in single step
  93. // now we need to schedule reductions at level 0 for all map results with given reduce key
  94. var mappedItems = actions.MapReduce.GetMappedBuckets(index.IndexId, localReduceKey).ToList();
  95. foreach (var result in mappedItems.Select(x => new ReduceKeyAndBucket(x, localReduceKey)))
  96. {
  97. actions.MapReduce.ScheduleReductions(index.IndexId, 0, result);
  98. }
  99. }
  100. });
  101. for (int i = 0; i < 3; i++)
  102. {
  103. var level = i;
  104. var reduceParams = new GetItemsToReduceParams(
  105. index.IndexId,
  106. keysToReduce,
  107. level,
  108. true,
  109. itemsToDelete);
  110. bool retry = true;
  111. while (retry && reduceParams.ReduceKeys.Count > 0)
  112. {
  113. var reduceBatchAutoThrottlerId = Guid.NewGuid();
  114. try
  115. {
  116. transactionalStorage.Batch(actions =>
  117. {
  118. context.CancellationToken.ThrowIfCancellationRequested();
  119. var batchTimeWatcher = Stopwatch.StartNew();
  120. reduceParams.Take = context.CurrentNumberOfItemsToReduceInSingleBatch;
  121. var persistedResults = actions.MapReduce.GetItemsToReduce(reduceParams).ToList();
  122. if (persistedResults.Count == 0)
  123. {
  124. retry = false;
  125. return;
  126. }
  127. var count = persistedResults.Count;
  128. var size = persistedResults.Sum(x => x.Size);
  129. autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reduceBatchAutoThrottlerId, size);
  130. if (Log.IsDebugEnabled)
  131. {
  132. if (persistedResults.Count > 0)
  133. Log.Debug(() => string.Format("Found {0} results for keys [{1}] for index {2} at level {3} in {4}",
  134. persistedResults.Count,
  135. string.Join(", ", persistedResults.Select(x => x.ReduceKey).Distinct()),
  136. index.IndexId, level, batchTimeWatcher.Elapsed));
  137. else
  138. Log.Debug("No reduce keys found for {0}", index.IndexId);
  139. }
  140. context.CancellationToken.ThrowIfCancellationRequested();
  141. var requiredReduceNextTime = persistedResults.Select(x => new ReduceKeyAndBucket(x.Bucket, x.ReduceKey))
  142. .OrderBy(x => x.Bucket)
  143. .Distinct()
  144. .ToArray();
  145. foreach (var mappedResultInfo in requiredReduceNextTime)
  146. {
  147. actions.MapReduce.RemoveReduceResults(index.IndexId, level + 1, mappedResultInfo.ReduceKey,
  148. mappedResultInfo.Bucket);
  149. }
  150. if (level != 2)
  151. {
  152. var reduceKeysAndBuckets = requiredReduceNextTime
  153. .Select(x => new ReduceKeyAndBucket(x.Bucket / 1024, x.ReduceKey))
  154. .Distinct()
  155. .ToArray();
  156. foreach (var reduceKeysAndBucket in reduceKeysAndBuckets)
  157. {
  158. actions.MapReduce.ScheduleReductions(index.IndexId, level + 1, reduceKeysAndBucket);
  159. }
  160. }
  161. var results = persistedResults
  162. .Where(x => x.Data != null)
  163. .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
  164. .ToArray();
  165. var reduceKeys = new HashSet<string>(persistedResults.Select(x => x.ReduceKey),
  166. StringComparer.InvariantCultureIgnoreCase);
  167. context.MetricsCounters.ReducedPerSecond.Mark(results.Length);
  168. context.CancellationToken.ThrowIfCancellationRequested();
  169. var reduceTimeWatcher = Stopwatch.StartNew();
  170. context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, level, context, actions, reduceKeys, persistedResults.Count);
  171. var batchDuration = batchTimeWatcher.Elapsed;
  172. Log.Debug("Indexed {0} reduce keys in {1} with {2} results for index {3} in {4} on level {5}", reduceKeys.Count, batchDuration,
  173. results.Length, index.IndexId, reduceTimeWatcher.Elapsed, level);
  174. autoTuner.AutoThrottleBatchSize(count, size, batchDuration);
  175. });
  176. }
  177. finally
  178. {
  179. long _;
  180. autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reduceBatchAutoThrottlerId, out _);
  181. }
  182. }
  183. }
  184. foreach (var reduceKey in needToMoveToMultiStep)
  185. {
  186. string localReduceKey = reduceKey;
  187. transactionalStorage.Batch(actions =>
  188. actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey,
  189. ReduceType.MultiStep));
  190. }
  191. }
  192. private void SingleStepReduce(IndexToWorkOn index, string[] keysToReduce, AbstractViewGenerator viewGenerator,
  193. ConcurrentSet<object> itemsToDelete)
  194. {
  195. var needToMoveToSingleStepQueue = new ConcurrentQueue<HashSet<string>>();
  196. Log.Debug(() => string.Format("Executing single step reducing for {0} keys [{1}]", keysToReduce.Length, string.Join(", ", keysToReduce)));
  197. var batchTimeWatcher = Stopwatch.StartNew();
  198. var count = 0;
  199. var size = 0;
  200. var state = new ConcurrentQueue<Tuple<HashSet<string>, List<MappedResultInfo>>>();
  201. var reducingBatchThrottlerId = Guid.NewGuid();
  202. try
  203. {
  204. BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, keysToReduce, enumerator =>
  205. {
  206. var localNeedToMoveToSingleStep = new HashSet<string>();
  207. needToMoveToSingleStepQueue.Enqueue(localNeedToMoveToSingleStep);
  208. var localKeys = new HashSet<string>();
  209. while (enumerator.MoveNext())
  210. {
  211. localKeys.Add(enumerator.Current);
  212. }
  213. transactionalStorage.Batch(actions =>
  214. {
  215. var getItemsToReduceParams = new GetItemsToReduceParams(index: index.IndexId, reduceKeys: localKeys, level: 0,
  216. loadData: false,
  217. itemsToDelete: itemsToDelete)
  218. {
  219. Take = int.MaxValue// just get all, we do the rate limit when we load the number of keys to reduce, anyway
  220. };
  221. var scheduledItems = actions.MapReduce.GetItemsToReduce(getItemsToReduceParams).ToList();
  222. autoTuner.CurrentlyUsedBatchSizesInBytes.GetOrAdd(reducingBatchThrottlerId, scheduledItems.Sum(x => x.Size));
  223. if (scheduledItems.Count == 0)
  224. {
  225. if (Log.IsWarnEnabled)
  226. {
  227. Log.Warn("Found single reduce items ({0}) that didn't have any items to reduce. Deleting level 1 & level 2 items for those keys. (If you can reproduce this, please contact support@ravendb.net)",
  228. string.Join(", ", keysToReduce));
  229. }
  230. // Here we have an interesting issue. We have scheduled reductions, because GetReduceTypesPerKeys() returned them
  231. // and at the same time, we don't have any at level 0. That probably means that we have them at level 1 or 2.
  232. // They shouldn't be here, and indeed, we remove them just a little down from here in this function.
  233. // That said, they might bave smuggled in between versions, or something happened to cause them to be here.
  234. // In order to avoid that, we forcibly delete those extra items from the scheduled reductions, and move on
  235. foreach (var reduceKey in keysToReduce)
  236. {
  237. actions.MapReduce.DeleteScheduledReduction(index.IndexId, 1, reduceKey);
  238. actions.MapReduce.DeleteScheduledReduction(index.IndexId, 2, reduceKey);
  239. }
  240. }
  241. foreach (var reduceKey in localKeys)
  242. {
  243. var lastPerformedReduceType = actions.MapReduce.GetLastPerformedReduceType(index.IndexId, reduceKey);
  244. if (lastPerformedReduceType != ReduceType.SingleStep)
  245. localNeedToMoveToSingleStep.Add(reduceKey);
  246. if (lastPerformedReduceType != ReduceType.MultiStep)
  247. continue;
  248. Log.Debug("Key {0} was moved from multi step to single step reduce, removing existing reduce results records",
  249. reduceKey);
  250. // now we are in single step but previously multi step reduce was performed for the given key
  251. var mappedBuckets = actions.MapReduce.GetMappedBuckets(index.IndexId, reduceKey).ToList();
  252. // add scheduled items too to be sure we will delete reduce results of already deleted documents
  253. mappedBuckets.AddRange(scheduledItems.Select(x => x.Bucket));
  254. foreach (var mappedBucket in mappedBuckets.Distinct())
  255. {
  256. actions.MapReduce.RemoveReduceResults(index.IndexId, 1, reduceKey, mappedBucket);
  257. actions.MapReduce.RemoveReduceResults(index.IndexId, 2, reduceKey, mappedBucket / 1024);
  258. }
  259. }
  260. var mappedResults = actions.MapReduce.GetMappedResults(
  261. index.IndexId,
  262. localKeys,
  263. loadData: true
  264. ).ToList();
  265. Interlocked.Add(ref count, mappedResults.Count);
  266. Interlocked.Add(ref size, mappedResults.Sum(x => x.Size));
  267. mappedResults.ApplyIfNotNull(x => x.Bucket = 0);
  268. state.Enqueue(Tuple.Create(localKeys, mappedResults));
  269. });
  270. });
  271. var reduceKeys = new HashSet<string>(state.SelectMany(x => x.Item1));
  272. var results = state.SelectMany(x => x.Item2)
  273. .Where(x => x.Data != null)
  274. .GroupBy(x => x.Bucket, x => JsonToExpando.Convert(x.Data))
  275. .ToArray();
  276. context.MetricsCounters.ReducedPerSecond.Mark(results.Length);
  277. context.TransactionalStorage.Batch(actions =>
  278. context.IndexStorage.Reduce(index.IndexId, viewGenerator, results, 2, context, actions, reduceKeys, state.Sum(x=>x.Item2.Count))
  279. );
  280. autoTuner.AutoThrottleBatchSize(count, size, batchTimeWatcher.Elapsed);
  281. var needToMoveToSingleStep = new HashSet<string>();
  282. HashSet<string> set;
  283. while (needToMoveToSingleStepQueue.TryDequeue(out set))
  284. {
  285. needToMoveToSingleStep.UnionWith(set);
  286. }
  287. foreach (var reduceKey in needToMoveToSingleStep)
  288. {
  289. string localReduceKey = reduceKey;
  290. transactionalStorage.Batch(actions =>
  291. actions.MapReduce.UpdatePerformedReduceType(index.IndexId, localReduceKey, ReduceType.SingleStep));
  292. }
  293. }
  294. finally
  295. {
  296. long _;
  297. autoTuner.CurrentlyUsedBatchSizesInBytes.TryRemove(reducingBatchThrottlerId, out _);
  298. }
  299. }
  300. protected override bool IsIndexStale(IndexStats indexesStat, IStorageActionsAccessor actions, bool isIdle, Reference<bool> onlyFoundIdleWork)
  301. {
  302. onlyFoundIdleWork.Value = false;
  303. return actions.Staleness.IsReduceStale(indexesStat.Id);
  304. }
  305. protected override DatabaseTask GetApplicableTask(IStorageActionsAccessor actions)
  306. {
  307. return null;
  308. }
  309. protected override void FlushAllIndexes()
  310. {
  311. context.IndexStorage.FlushReduceIndexes();
  312. }
  313. protected override IndexToWorkOn GetIndexToWorkOn(IndexStats indexesStat)
  314. {
  315. return new IndexToWorkOn
  316. {
  317. IndexId = indexesStat.Id,
  318. LastIndexedEtag = Etag.Empty
  319. };
  320. }
  321. protected override void ExecuteIndexingWork(IList<IndexToWorkOn> indexesToWorkOn)
  322. {
  323. BackgroundTaskExecuter.Instance.ExecuteAllInterleaved(context, indexesToWorkOn,
  324. HandleReduceForIndex);
  325. }
  326. protected override bool IsValidIndex(IndexStats indexesStat)
  327. {
  328. var indexDefinition = context.IndexDefinitionStorage.GetIndexDefinition(indexesStat.Id);
  329. return indexDefinition != null && indexDefinition.IsMapReduce;
  330. }
  331. }
  332. }