PageRenderTime 59ms CodeModel.GetById 31ms RepoModel.GetById 0ms app.codeStats 0ms

/ToMigrate/Raven.Database/Indexing/SimpleIndex.cs

http://github.com/ayende/ravendb
C# | 480 lines | 397 code | 71 blank | 12 comment | 38 complexity | 6b9ce420f7e305b8813339c5ea99c572 MD5 | raw file
Possible License(s): GPL-3.0, MPL-2.0-no-copyleft-exception, LGPL-2.1, Apache-2.0, BSD-3-Clause, CC-BY-SA-3.0
  1. //-----------------------------------------------------------------------
  2. // <copyright file="SimpleIndex.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. //-----------------------------------------------------------------------
  6. using System;
  7. using System.Collections.Concurrent;
  8. using System.Collections.Generic;
  9. using System.ComponentModel;
  10. using System.Diagnostics;
  11. using System.Linq;
  12. using System.Threading;
  13. using Lucene.Net.Documents;
  14. using Lucene.Net.Index;
  15. using Lucene.Net.Store;
  16. using Raven.Abstractions;
  17. using Raven.Abstractions.Data;
  18. using Raven.Abstractions.Exceptions;
  19. using Raven.Abstractions.Indexing;
  20. using Raven.Abstractions.Linq;
  21. using Raven.Abstractions.Logging;
  22. using Raven.Database.Extensions;
  23. using Raven.Database.Linq;
  24. using Raven.Database.Storage;
  25. using Raven.Database.Util;
  26. using Spatial4n.Core.Exceptions;
  27. namespace Raven.Database.Indexing
  28. {
  29. internal class SimpleIndex : Index
  30. {
  31. public SimpleIndex(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
  32. : base(directory, id, indexDefinition, viewGenerator, context)
  33. {
  34. }
  35. public override bool IsMapReduce
  36. {
  37. get { return false; }
  38. }
  39. public DateTime LastCommitPointStoreTime { get; private set; }
  40. public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
  41. {
  42. token.ThrowIfCancellationRequested();
  43. var count = 0;
  44. var sourceCount = 0;
  45. var writeToIndexStats = new List<PerformanceStats>();
  46. IndexingPerformanceStats performance = null;
  47. var performanceStats = new List<BasePerformanceStats>();
  48. var storageCommitDuration = new Stopwatch();
  49. actions.BeforeStorageCommit += storageCommitDuration.Start;
  50. actions.AfterStorageCommit += () =>
  51. {
  52. storageCommitDuration.Stop();
  53. performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
  54. };
  55. Write((indexWriter, analyzer, stats) =>
  56. {
  57. var processedKeys = new HashSet<string>();
  58. var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
  59. .Where(x => x != null)
  60. .ToList();
  61. try
  62. {
  63. performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);
  64. var deleteExistingDocumentsDuration = new Stopwatch();
  65. Interlocked.Increment(ref sourceCount);
  66. var docIdTerm = new Term(Constants.DocumentIdFieldName);
  67. var documentsWrapped = batch.Docs.Select((doc, i) =>
  68. {
  69. token.ThrowIfCancellationRequested();
  70. if (doc.__document_id == null)
  71. throw new ArgumentException(
  72. string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
  73. string documentId = doc.__document_id.ToString();
  74. if (processedKeys.Add(documentId) == false)
  75. return doc;
  76. InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
  77. if (batch.SkipDeleteFromIndex[i] == false ||
  78. context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
  79. {
  80. using (StopwatchScope.For(deleteExistingDocumentsDuration))
  81. {
  82. indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
  83. }
  84. }
  85. return doc;
  86. })
  87. .Where(x => x is FilteredDocument == false)
  88. .ToList();
  89. performanceStats.Add(new PerformanceStats
  90. {
  91. Name = IndexingOperation.Lucene_DeleteExistingDocument,
  92. DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
  93. });
  94. var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
  95. var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
  96. var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();
  97. var parallelProcessingStart = SystemTime.UtcNow;
  98. context.Database.MappingThreadPool.ExecuteBatch(documentsWrapped, (IEnumerator<dynamic> partition) =>
  99. {
  100. token.ThrowIfCancellationRequested();
  101. var parallelStats = new ParallelBatchStats
  102. {
  103. StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
  104. };
  105. var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
  106. var luceneDoc = new Document();
  107. var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
  108. Field.Index.NOT_ANALYZED_NO_NORMS);
  109. using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
  110. {
  111. string currentDocId = null;
  112. int outputPerDocId = 0;
  113. Action<Exception, object> onErrorFunc;
  114. bool skipDocument = false;
  115. var linqExecutionDuration = new Stopwatch();
  116. var addDocumentDutation = new Stopwatch();
  117. var convertToLuceneDocumentDuration = new Stopwatch();
  118. foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
  119. {
  120. token.ThrowIfCancellationRequested();
  121. float boost;
  122. IndexingResult indexingResult;
  123. using (StopwatchScope.For(convertToLuceneDocumentDuration))
  124. {
  125. try
  126. {
  127. indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
  128. }
  129. catch (Exception e)
  130. {
  131. onErrorFunc(e, doc);
  132. continue;
  133. }
  134. }
  135. // ReSharper disable once RedundantBoolCompare --> code clarity
  136. if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
  137. {
  138. continue;
  139. }
  140. if (currentDocId != indexingResult.NewDocId)
  141. {
  142. currentDocId = indexingResult.NewDocId;
  143. outputPerDocId = 0;
  144. skipDocument = false;
  145. }
  146. if (skipDocument)
  147. continue;
  148. outputPerDocId++;
  149. if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
  150. {
  151. skipDocument = true;
  152. continue;
  153. }
  154. Interlocked.Increment(ref count);
  155. using (StopwatchScope.For(convertToLuceneDocumentDuration))
  156. {
  157. luceneDoc.GetFields().Clear();
  158. luceneDoc.Boost = boost;
  159. documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
  160. luceneDoc.Add(documentIdField);
  161. foreach (var field in indexingResult.Fields)
  162. {
  163. luceneDoc.Add(field);
  164. }
  165. }
  166. batchers.ApplyAndIgnoreAllErrors(
  167. exception =>
  168. {
  169. logIndexing.WarnException(
  170. string.Format(
  171. "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
  172. PublicName, indexingResult.NewDocId),
  173. exception);
  174. context.AddError(
  175. indexId,
  176. PublicName,
  177. indexingResult.NewDocId,
  178. exception,
  179. "OnIndexEntryCreated Trigger");
  180. },
  181. trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
  182. LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
  183. using (StopwatchScope.For(addDocumentDutation))
  184. {
  185. AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
  186. }
  187. Interlocked.Increment(ref stats.IndexingSuccesses);
  188. }
  189. allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
  190. allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
  191. parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
  192. parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
  193. parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
  194. parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));
  195. parallelOperations.Enqueue(parallelStats);
  196. }
  197. }, description: string.Format("Mapping index {0} from Etag {1} to Etag {2}", this.PublicName, this.GetLastEtagFromStats(), batch.HighestEtagBeforeFiltering));
  198. performanceStats.Add(new ParallelPerformanceStats
  199. {
  200. NumberOfThreads = parallelOperations.Count,
  201. DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
  202. BatchedOperations = parallelOperations.ToList()
  203. });
  204. var updateDocumentReferencesDuration = new Stopwatch();
  205. using (StopwatchScope.For(updateDocumentReferencesDuration))
  206. {
  207. UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
  208. }
  209. performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
  210. }
  211. catch (Exception e)
  212. {
  213. batchers.ApplyAndIgnoreAllErrors(
  214. ex =>
  215. {
  216. logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
  217. context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
  218. },
  219. x => x.AnErrorOccured(e));
  220. throw;
  221. }
  222. finally
  223. {
  224. batchers.ApplyAndIgnoreAllErrors(
  225. e =>
  226. {
  227. logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
  228. context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
  229. },
  230. x => x.Dispose());
  231. }
  232. return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
  233. {
  234. ChangedDocs = sourceCount
  235. };
  236. }, writeToIndexStats);
  237. performanceStats.AddRange(writeToIndexStats);
  238. InitializeIndexingPerformanceCompleteDelegate(performance, sourceCount, count, performanceStats);
  239. if (logIndexing.IsDebugEnabled)
  240. logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);
  241. return performance;
  242. }
  243. private void InitializeIndexingPerformanceCompleteDelegate(IndexingPerformanceStats performance, int sourceCount, int count, List<BasePerformanceStats> performanceStats)
  244. {
  245. performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats);
  246. }
  247. protected override bool IsUpToDateEnoughToWriteToDisk(Etag highestETag)
  248. {
  249. bool upToDate = false;
  250. context.Database.TransactionalStorage.Batch(accessor =>
  251. {
  252. upToDate = accessor.Staleness.GetMostRecentDocumentEtag() == highestETag;
  253. });
  254. return upToDate;
  255. }
  256. protected override void HandleCommitPoints(IndexedItemsInfo itemsInfo, IndexSegmentsInfo segmentsInfo)
  257. {
  258. logIndexing.Error("HandlingCommitPoint for index {0} in DB {1}", this.PublicName, this.context.DatabaseName);
  259. if (ShouldStoreCommitPoint(itemsInfo) && itemsInfo.HighestETag != null)
  260. {
  261. context.IndexStorage.StoreCommitPoint(indexId.ToString(), new IndexCommitPoint
  262. {
  263. HighestCommitedETag = itemsInfo.HighestETag,
  264. TimeStamp = LastIndexTime,
  265. SegmentsInfo = segmentsInfo ?? IndexStorage.GetCurrentSegmentsInfo(indexDefinition.Name, directory)
  266. });
  267. LastCommitPointStoreTime = SystemTime.UtcNow;
  268. }
  269. else if (itemsInfo.DeletedKeys != null && directory is RAMDirectory == false)
  270. {
  271. context.IndexStorage.AddDeletedKeysToCommitPoints(indexDefinition, itemsInfo.DeletedKeys);
  272. }
  273. }
  274. private bool ShouldStoreCommitPoint(IndexedItemsInfo itemsInfo)
  275. {
  276. if (itemsInfo.DisableCommitPoint)
  277. return false;
  278. if (directory is RAMDirectory) // no point in trying to store commits for ram index
  279. return false;
  280. // no often than specified indexing interval
  281. return (LastIndexTime - PreviousIndexTime > context.Configuration.Indexing.MinIndexingIntervalToStoreCommitPoint.AsTimeSpan ||
  282. // at least once for specified time interval
  283. LastIndexTime - LastCommitPointStoreTime > context.Configuration.Indexing.MaxIndexCommitPointStoreInterval.AsTimeSpan);
  284. }
  285. private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost)
  286. {
  287. boost = 1;
  288. var boostedValue = doc as BoostedValue;
  289. if (boostedValue != null)
  290. {
  291. doc = boostedValue.Value;
  292. boost = boostedValue.Boost;
  293. }
  294. IndexingResult indexingResult;
  295. var docAsDynamicJsonObject = doc as DynamicJsonObject;
  296. // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression
  297. if (docAsDynamicJsonObject != null)
  298. indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, docAsDynamicJsonObject);
  299. else
  300. indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc);
  301. if (Math.Abs(boost - 1) > float.Epsilon)
  302. {
  303. foreach (var abstractField in indexingResult.Fields)
  304. {
  305. abstractField.OmitNorms = false;
  306. }
  307. }
  308. return indexingResult;
  309. }
  310. private class IndexingResult
  311. {
  312. public string NewDocId;
  313. public List<AbstractField> Fields;
  314. public bool ShouldSkip;
  315. }
  316. private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject)
  317. {
  318. var newDocIdAsObject = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId();
  319. var newDocId = newDocIdAsObject is DynamicNullObject ? null : (string)newDocIdAsObject;
  320. List<AbstractField> abstractFields;
  321. try
  322. {
  323. abstractFields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList();
  324. }
  325. catch (InvalidShapeException e)
  326. {
  327. throw new InvalidSpatialShapException(e, newDocId);
  328. }
  329. return new IndexingResult
  330. {
  331. Fields = abstractFields,
  332. NewDocId = newDocId,
  333. ShouldSkip = false
  334. };
  335. }
  336. private readonly ConcurrentDictionary<Type, PropertyAccessor> propertyAccessorCache = new ConcurrentDictionary<Type, PropertyAccessor>();
  337. private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc)
  338. {
  339. PropertyAccessor propertyAccessor;
  340. var newDocId = GetDocumentId(doc, out propertyAccessor);
  341. List<AbstractField> abstractFields;
  342. try
  343. {
  344. abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, propertyAccessor, Field.Store.NO).ToList();
  345. }
  346. catch (InvalidShapeException e)
  347. {
  348. throw new InvalidSpatialShapException(e, newDocId);
  349. }
  350. return new IndexingResult
  351. {
  352. Fields = abstractFields,
  353. NewDocId = newDocId,
  354. ShouldSkip = propertyAccessor.Properies.Count > 1 // we always have at least __document_id
  355. && abstractFields.Count == 0
  356. };
  357. }
  358. private string GetDocumentId(object doc, out PropertyAccessor accessor)
  359. {
  360. Type type = doc.GetType();
  361. accessor = propertyAccessorCache.GetOrAdd(type, PropertyAccessor.Create);
  362. return accessor.GetValue(Constants.DocumentIdFieldName, doc) as string;
  363. }
  364. public override void Remove(string[] keys, WorkContext context)
  365. {
  366. Write((writer, analyzer, stats) =>
  367. {
  368. stats.Operation = IndexingWorkStats.Status.Ignore;
  369. if (logIndexing.IsDebugEnabled)
  370. logIndexing.Debug(() => string.Format("Deleting ({0}) from {1}", string.Join(", ", keys), PublicName));
  371. var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
  372. .Where(x => x != null)
  373. .ToList();
  374. keys.Apply(
  375. key =>
  376. InvokeOnIndexEntryDeletedOnAllBatchers(batchers, new Term(Constants.DocumentIdFieldName, key.ToLowerInvariant())));
  377. writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
  378. batchers.ApplyAndIgnoreAllErrors(
  379. e =>
  380. {
  381. logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
  382. context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
  383. },
  384. batcher => batcher.Dispose());
  385. return new IndexedItemsInfo(GetLastEtagFromStats())
  386. {
  387. ChangedDocs = keys.Length,
  388. DeletedKeys = keys
  389. };
  390. });
  391. }
  392. /// <summary>
  393. /// For index recovery purposes
  394. /// </summary>
  395. internal void RemoveDirectlyFromIndex(string[] keys, Etag lastEtag)
  396. {
  397. Write((writer, analyzer, stats) =>
  398. {
  399. stats.Operation = IndexingWorkStats.Status.Ignore;
  400. writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
  401. return new IndexedItemsInfo(lastEtag) // just commit, don't create commit point and add any infor about deleted keys
  402. {
  403. ChangedDocs = keys.Length,
  404. DisableCommitPoint = true
  405. };
  406. });
  407. }
  408. }
  409. }