PageRenderTime 56ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 1ms

/Raven.Database/Indexing/SimpleIndex.cs

https://github.com/kairogyn/ravendb
C# | 375 lines | 326 code | 39 blank | 10 comment | 29 complexity | ac31860b35c3004ae21286cb51b25a56 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, CC-BY-SA-3.0
  1. //-----------------------------------------------------------------------
  2. // <copyright file="SimpleIndex.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. //-----------------------------------------------------------------------
  6. using System;
  7. using System.Collections.Concurrent;
  8. using System.Collections.Generic;
  9. using System.ComponentModel;
  10. using System.Diagnostics;
  11. using System.Linq;
  12. using System.Threading;
  13. using Lucene.Net.Documents;
  14. using Lucene.Net.Index;
  15. using Lucene.Net.Store;
  16. using Raven.Abstractions;
  17. using Raven.Abstractions.Data;
  18. using Raven.Abstractions.Indexing;
  19. using Raven.Abstractions.Linq;
  20. using Raven.Abstractions.Logging;
  21. using Raven.Database.Extensions;
  22. using Raven.Database.Linq;
  23. using Raven.Database.Storage;
  24. namespace Raven.Database.Indexing
  25. {
  26. public class SimpleIndex : Index
  27. {
  28. public SimpleIndex(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
  29. : base(directory, name, indexDefinition, viewGenerator, context)
  30. {
  31. }
  32. public override bool IsMapReduce
  33. {
  34. get { return false; }
  35. }
  36. public DateTime LastCommitPointStoreTime { get; private set; }
  37. public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
  38. {
  39. var count = 0;
  40. var sourceCount = 0;
  41. var sw = Stopwatch.StartNew();
  42. var start = SystemTime.UtcNow;
  43. Write((indexWriter, analyzer, stats) =>
  44. {
  45. var processedKeys = new HashSet<string>();
  46. var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
  47. .Where(x => x != null)
  48. .ToList();
  49. try
  50. {
  51. RecordCurrentBatch("Current", batch.Docs.Count);
  52. var docIdTerm = new Term(Constants.DocumentIdFieldName);
  53. var documentsWrapped = batch.Docs.Select((doc, i) =>
  54. {
  55. Interlocked.Increment(ref sourceCount);
  56. if (doc.__document_id == null)
  57. throw new ArgumentException(
  58. string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
  59. string documentId = doc.__document_id.ToString();
  60. if (processedKeys.Add(documentId) == false)
  61. return doc;
  62. batchers.ApplyAndIgnoreAllErrors(
  63. exception =>
  64. {
  65. logIndexing.WarnException(
  66. string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
  67. name, documentId),
  68. exception);
  69. context.AddError(name,
  70. documentId,
  71. exception.Message,
  72. "OnIndexEntryDeleted Trigger"
  73. );
  74. },
  75. trigger => trigger.OnIndexEntryDeleted(documentId));
  76. if (batch.SkipDeleteFromIndex[i] == false ||
  77. context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
  78. indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
  79. return doc;
  80. })
  81. .Where(x => x is FilteredDocument == false)
  82. .ToList();
  83. var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
  84. var missingReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
  85. BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
  86. {
  87. var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator);
  88. var luceneDoc = new Document();
  89. var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
  90. Field.Index.NOT_ANALYZED_NO_NORMS);
  91. using (CurrentIndexingScope.Current = new CurrentIndexingScope(LoadDocument, (references,
  92. missing) =>
  93. {
  94. allReferencedDocs.Enqueue(references);
  95. missingReferencedDocs.Enqueue(missing);
  96. } ))
  97. {
  98. foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats))
  99. {
  100. float boost;
  101. var indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
  102. if (indexingResult.NewDocId != null && indexingResult.ShouldSkip == false)
  103. {
  104. Interlocked.Increment(ref count);
  105. luceneDoc.GetFields().Clear();
  106. luceneDoc.Boost = boost;
  107. documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
  108. luceneDoc.Add(documentIdField);
  109. foreach (var field in indexingResult.Fields)
  110. {
  111. luceneDoc.Add(field);
  112. }
  113. batchers.ApplyAndIgnoreAllErrors(
  114. exception =>
  115. {
  116. logIndexing.WarnException(
  117. string.Format("Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
  118. name, indexingResult.NewDocId),
  119. exception);
  120. context.AddError(name,
  121. indexingResult.NewDocId,
  122. exception.Message,
  123. "OnIndexEntryCreated Trigger"
  124. );
  125. },
  126. trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
  127. LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
  128. AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
  129. }
  130. Interlocked.Increment(ref stats.IndexingSuccesses);
  131. }
  132. }
  133. });
  134. UpdateDocumentReferences(actions, allReferencedDocs, missingReferencedDocs);
  135. }
  136. catch (Exception e)
  137. {
  138. batchers.ApplyAndIgnoreAllErrors(
  139. ex =>
  140. {
  141. logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
  142. context.AddError(name, null, ex.Message, "AnErrorOccured Trigger");
  143. },
  144. x => x.AnErrorOccured(e));
  145. throw;
  146. }
  147. finally
  148. {
  149. batchers.ApplyAndIgnoreAllErrors(
  150. e =>
  151. {
  152. logIndexing.WarnException("Failed to dispose on index update trigger", e);
  153. context.AddError(name, null, e.Message, "Dispose Trigger");
  154. },
  155. x => x.Dispose());
  156. BatchCompleted("Current");
  157. }
  158. return new IndexedItemsInfo
  159. {
  160. ChangedDocs = sourceCount,
  161. HighestETag = batch.HighestEtagInBatch
  162. };
  163. });
  164. AddindexingPerformanceStat(new IndexingPerformanceStats
  165. {
  166. OutputCount = count,
  167. ItemsCount = sourceCount,
  168. InputCount = batch.Docs.Count,
  169. Duration = sw.Elapsed,
  170. Operation = "Index",
  171. Started = start
  172. });
  173. logIndexing.Debug("Indexed {0} documents for {1}", count, name);
  174. }
  175. protected override bool IsUpToDateEnoughToWriteToDisk(Etag highestETag)
  176. {
  177. bool upToDate = false;
  178. context.Database.TransactionalStorage.Batch(accessor =>
  179. {
  180. upToDate = accessor.Staleness.GetMostRecentDocumentEtag() == highestETag;
  181. });
  182. return upToDate;
  183. }
  184. protected override void HandleCommitPoints(IndexedItemsInfo itemsInfo)
  185. {
  186. if (ShouldStoreCommitPoint() && itemsInfo.HighestETag != null)
  187. {
  188. context.IndexStorage.StoreCommitPoint(name, new IndexCommitPoint
  189. {
  190. HighestCommitedETag = itemsInfo.HighestETag,
  191. TimeStamp = LastIndexTime,
  192. SegmentsInfo = GetCurrentSegmentsInfo()
  193. });
  194. LastCommitPointStoreTime = SystemTime.UtcNow;
  195. }
  196. else if (itemsInfo.DeletedKeys != null && directory is RAMDirectory == false)
  197. {
  198. context.IndexStorage.AddDeletedKeysToCommitPoints(name, itemsInfo.DeletedKeys);
  199. }
  200. }
  201. private IndexSegmentsInfo GetCurrentSegmentsInfo()
  202. {
  203. var segmentInfos = new SegmentInfos();
  204. var result = new IndexSegmentsInfo();
  205. try
  206. {
  207. segmentInfos.Read(directory);
  208. result.Generation = segmentInfos.Generation;
  209. result.SegmentsFileName = segmentInfos.GetCurrentSegmentFileName();
  210. result.ReferencedFiles = segmentInfos.Files(directory, false);
  211. }
  212. catch (CorruptIndexException ex)
  213. {
  214. logIndexing.WarnException(string.Format("Could not read segment information for an index '{0}'", name), ex);
  215. result.IsIndexCorrupted = true;
  216. }
  217. return result;
  218. }
  219. private bool ShouldStoreCommitPoint()
  220. {
  221. if (directory is RAMDirectory) // no point in trying to store commits for ram index
  222. return false;
  223. // no often than specified indexing interval
  224. return (LastIndexTime - PreviousIndexTime > context.Configuration.MinIndexingTimeIntervalToStoreCommitPoint ||
  225. // at least once for specified time interval
  226. LastIndexTime - LastCommitPointStoreTime > context.Configuration.MaxIndexCommitPointStoreTimeInterval);
  227. }
  228. private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost)
  229. {
  230. boost = 1;
  231. var boostedValue = doc as BoostedValue;
  232. if (boostedValue != null)
  233. {
  234. doc = boostedValue.Value;
  235. boost = boostedValue.Boost;
  236. }
  237. IndexingResult indexingResult;
  238. if (doc is DynamicJsonObject)
  239. indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, (DynamicJsonObject)doc);
  240. else
  241. indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc);
  242. if (Math.Abs(boost - 1) > float.Epsilon)
  243. {
  244. foreach (var abstractField in indexingResult.Fields)
  245. {
  246. abstractField.OmitNorms = false;
  247. }
  248. }
  249. return indexingResult;
  250. }
  251. private class IndexingResult
  252. {
  253. public string NewDocId;
  254. public List<AbstractField> Fields;
  255. public bool ShouldSkip;
  256. }
  257. private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject)
  258. {
  259. var newDocId = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId();
  260. return new IndexingResult
  261. {
  262. Fields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList(),
  263. NewDocId = newDocId is DynamicNullObject ? null : (string)newDocId,
  264. ShouldSkip = false
  265. };
  266. }
  267. private readonly ConcurrentDictionary<Type, PropertyDescriptorCollection> propertyDescriptorCache = new ConcurrentDictionary<Type, PropertyDescriptorCollection>();
  268. private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc)
  269. {
  270. Type type = doc.GetType();
  271. PropertyDescriptorCollection properties =
  272. propertyDescriptorCache.GetOrAdd(type, TypeDescriptor.GetProperties);
  273. var abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.NO).ToList();
  274. return new IndexingResult()
  275. {
  276. Fields = abstractFields,
  277. NewDocId = properties.Find(Constants.DocumentIdFieldName, false).GetValue(doc) as string,
  278. ShouldSkip = properties.Count > 1 // we always have at least __document_id
  279. && abstractFields.Count == 0
  280. };
  281. }
  282. public override void Remove(string[] keys, WorkContext context)
  283. {
  284. Write((writer, analyzer, stats) =>
  285. {
  286. stats.Operation = IndexingWorkStats.Status.Ignore;
  287. logIndexing.Debug(() => string.Format("Deleting ({0}) from {1}", string.Join(", ", keys), name));
  288. var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(name))
  289. .Where(x => x != null)
  290. .ToList();
  291. keys.Apply(
  292. key => batchers.ApplyAndIgnoreAllErrors(
  293. exception =>
  294. {
  295. logIndexing.WarnException(
  296. string.Format("Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
  297. name, key),
  298. exception);
  299. context.AddError(name, key, exception.Message, "OnIndexEntryDeleted Trigger");
  300. },
  301. trigger => trigger.OnIndexEntryDeleted(key)));
  302. writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
  303. batchers.ApplyAndIgnoreAllErrors(
  304. e =>
  305. {
  306. logIndexing.WarnException("Failed to dispose on index update trigger", e);
  307. context.AddError(name, null, e.Message, "Dispose Trigger");
  308. },
  309. batcher => batcher.Dispose());
  310. IndexStats currentIndexStats = null;
  311. context.TransactionalStorage.Batch(accessor => currentIndexStats = accessor.Indexing.GetIndexStats(name));
  312. return new IndexedItemsInfo
  313. {
  314. ChangedDocs = keys.Length,
  315. HighestETag = currentIndexStats.LastIndexedEtag,
  316. DeletedKeys = keys
  317. };
  318. });
  319. }
  320. /// <summary>
  321. /// For index recovery purposes
  322. /// </summary>
  323. internal void RemoveDirectlyFromIndex(string[] keys)
  324. {
  325. Write((writer, analyzer, stats) =>
  326. {
  327. stats.Operation = IndexingWorkStats.Status.Ignore;
  328. writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
  329. return new IndexedItemsInfo // just commit, don't create commit point and add any infor about deleted keys
  330. {
  331. ChangedDocs = keys.Length
  332. };
  333. });
  334. }
  335. }
  336. }