PageRenderTime 55ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/Raven.Database/Indexing/SimpleIndex.cs

https://github.com/nwendel/ravendb
C# | 395 lines | 340 code | 43 blank | 12 comment | 33 complexity | d85e9fdeafb414b64249f49bba2868c0 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, CC-BY-SA-3.0
  1. //-----------------------------------------------------------------------
  2. // <copyright file="SimpleIndex.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. //-----------------------------------------------------------------------
  6. using System;
  7. using System.Collections.Concurrent;
  8. using System.Collections.Generic;
  9. using System.ComponentModel;
  10. using System.Diagnostics;
  11. using System.Linq;
  12. using System.Threading;
  13. using Lucene.Net.Documents;
  14. using Lucene.Net.Index;
  15. using Lucene.Net.Search;
  16. using Lucene.Net.Store;
  17. using Raven.Abstractions;
  18. using Raven.Abstractions.Data;
  19. using Raven.Abstractions.Exceptions;
  20. using Raven.Abstractions.Indexing;
  21. using Raven.Abstractions.Linq;
  22. using Raven.Abstractions.Logging;
  23. using Raven.Database.Extensions;
  24. using Raven.Database.Linq;
  25. using Raven.Database.Storage;
  26. using Spatial4n.Core.Exceptions;
  27. namespace Raven.Database.Indexing
  28. {
  29. public class SimpleIndex : Index
  30. {
  31. public SimpleIndex(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
  32. : base(directory, id, indexDefinition, viewGenerator, context)
  33. {
  34. }
  35. public override bool IsMapReduce
  36. {
  37. get { return false; }
  38. }
  39. public DateTime LastCommitPointStoreTime { get; private set; }
  40. public override void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp)
  41. {
  42. var count = 0;
  43. var sourceCount = 0;
  44. var sw = Stopwatch.StartNew();
  45. var start = SystemTime.UtcNow;
  46. Write((indexWriter, analyzer, stats) =>
  47. {
  48. var processedKeys = new HashSet<string>();
  49. var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
  50. .Where(x => x != null)
  51. .ToList();
  52. try
  53. {
  54. RecordCurrentBatch("Current", batch.Docs.Count);
  55. var docIdTerm = new Term(Constants.DocumentIdFieldName);
  56. var documentsWrapped = batch.Docs.Select((doc, i) =>
  57. {
  58. Interlocked.Increment(ref sourceCount);
  59. if (doc.__document_id == null)
  60. throw new ArgumentException(
  61. string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
  62. string documentId = doc.__document_id.ToString();
  63. if (processedKeys.Add(documentId) == false)
  64. return doc;
  65. InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
  66. if (batch.SkipDeleteFromIndex[i] == false ||
  67. context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
  68. indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
  69. return doc;
  70. })
  71. .Where(x => x is FilteredDocument == false)
  72. .ToList();
  73. var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
  74. var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
  75. BackgroundTaskExecuter.Instance.ExecuteAllBuffered(context, documentsWrapped, (partition) =>
  76. {
  77. var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
  78. var luceneDoc = new Document();
  79. var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
  80. Field.Index.NOT_ANALYZED_NO_NORMS);
  81. using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
  82. {
  83. string currentDocId = null;
  84. int outputPerDocId = 0;
  85. Action<Exception, object> onErrorFunc;
  86. foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats,out onErrorFunc))
  87. {
  88. float boost;
  89. IndexingResult indexingResult;
  90. try
  91. {
  92. indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
  93. }
  94. catch (InvalidSpatialShapeException e)
  95. {
  96. onErrorFunc(e, doc);
  97. continue;
  98. }
  99. try
  100. {
  101. indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
  102. }
  103. catch (Exception e)
  104. {
  105. onErrorFunc(e, doc);
  106. continue;
  107. }
  108. // ReSharper disable once RedundantBoolCompare --> code clarity
  109. if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
  110. {
  111. continue;
  112. }
  113. if (currentDocId != indexingResult.NewDocId)
  114. {
  115. currentDocId = indexingResult.NewDocId;
  116. outputPerDocId = 0;
  117. }
  118. outputPerDocId++;
  119. EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId);
  120. Interlocked.Increment(ref count);
  121. luceneDoc.GetFields().Clear();
  122. luceneDoc.Boost = boost;
  123. documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
  124. luceneDoc.Add(documentIdField);
  125. foreach (var field in indexingResult.Fields)
  126. {
  127. luceneDoc.Add(field);
  128. }
  129. batchers.ApplyAndIgnoreAllErrors(
  130. exception =>
  131. {
  132. logIndexing.WarnException(
  133. string.Format(
  134. "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
  135. indexId, indexingResult.NewDocId),
  136. exception);
  137. context.AddError(indexId,
  138. indexingResult.NewDocId,
  139. exception.Message,
  140. "OnIndexEntryCreated Trigger"
  141. );
  142. },
  143. trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
  144. LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
  145. AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
  146. Interlocked.Increment(ref stats.IndexingSuccesses);
  147. }
  148. allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
  149. allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
  150. }
  151. });
  152. UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
  153. }
  154. catch (Exception e)
  155. {
  156. batchers.ApplyAndIgnoreAllErrors(
  157. ex =>
  158. {
  159. logIndexing.WarnException("Failed to notify index update trigger batcher about an error", ex);
  160. context.AddError(indexId, null, ex.Message, "AnErrorOccured Trigger");
  161. },
  162. x => x.AnErrorOccured(e));
  163. throw;
  164. }
  165. finally
  166. {
  167. batchers.ApplyAndIgnoreAllErrors(
  168. e =>
  169. {
  170. logIndexing.WarnException("Failed to dispose on index update trigger", e);
  171. context.AddError(indexId, null, e.Message, "Dispose Trigger");
  172. },
  173. x => x.Dispose());
  174. BatchCompleted("Current");
  175. }
  176. return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
  177. {
  178. ChangedDocs = sourceCount
  179. };
  180. });
  181. AddindexingPerformanceStat(new IndexingPerformanceStats
  182. {
  183. OutputCount = count,
  184. ItemsCount = sourceCount,
  185. InputCount = batch.Docs.Count,
  186. Duration = sw.Elapsed,
  187. Operation = "Index",
  188. Started = start
  189. });
  190. logIndexing.Debug("Indexed {0} documents for {1}", count, indexId);
  191. }
  192. protected override bool IsUpToDateEnoughToWriteToDisk(Etag highestETag)
  193. {
  194. bool upToDate = false;
  195. context.Database.TransactionalStorage.Batch(accessor =>
  196. {
  197. upToDate = accessor.Staleness.GetMostRecentDocumentEtag() == highestETag;
  198. });
  199. return upToDate;
  200. }
  201. protected override void HandleCommitPoints(IndexedItemsInfo itemsInfo, IndexSegmentsInfo segmentsInfo)
  202. {
  203. if (ShouldStoreCommitPoint(itemsInfo) && itemsInfo.HighestETag != null)
  204. {
  205. context.IndexStorage.StoreCommitPoint(indexId.ToString(), new IndexCommitPoint
  206. {
  207. HighestCommitedETag = itemsInfo.HighestETag,
  208. TimeStamp = LastIndexTime,
  209. SegmentsInfo = segmentsInfo ?? IndexStorage.GetCurrentSegmentsInfo(indexDefinition.Name, directory)
  210. });
  211. LastCommitPointStoreTime = SystemTime.UtcNow;
  212. }
  213. else if (itemsInfo.DeletedKeys != null && directory is RAMDirectory == false)
  214. {
  215. context.IndexStorage.AddDeletedKeysToCommitPoints(indexDefinition, itemsInfo.DeletedKeys);
  216. }
  217. }
  218. private bool ShouldStoreCommitPoint(IndexedItemsInfo itemsInfo)
  219. {
  220. if (itemsInfo.DisableCommitPoint)
  221. return false;
  222. if (directory is RAMDirectory) // no point in trying to store commits for ram index
  223. return false;
  224. // no often than specified indexing interval
  225. return (LastIndexTime - PreviousIndexTime > context.Configuration.MinIndexingTimeIntervalToStoreCommitPoint ||
  226. // at least once for specified time interval
  227. LastIndexTime - LastCommitPointStoreTime > context.Configuration.MaxIndexCommitPointStoreTimeInterval);
  228. }
  229. private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost)
  230. {
  231. boost = 1;
  232. var boostedValue = doc as BoostedValue;
  233. if (boostedValue != null)
  234. {
  235. doc = boostedValue.Value;
  236. boost = boostedValue.Boost;
  237. }
  238. IndexingResult indexingResult;
  239. var docAsDynamicJsonObject = doc as DynamicJsonObject;
  240. // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression
  241. if (docAsDynamicJsonObject != null)
  242. indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, docAsDynamicJsonObject);
  243. else
  244. indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc);
  245. if (Math.Abs(boost - 1) > float.Epsilon)
  246. {
  247. foreach (var abstractField in indexingResult.Fields)
  248. {
  249. abstractField.OmitNorms = false;
  250. }
  251. }
  252. return indexingResult;
  253. }
  254. private class IndexingResult
  255. {
  256. public string NewDocId;
  257. public List<AbstractField> Fields;
  258. public bool ShouldSkip;
  259. }
  260. private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject)
  261. {
  262. var newDocIdAsObject = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId();
  263. var newDocId = newDocIdAsObject is DynamicNullObject ? null : (string) newDocIdAsObject;
  264. List<AbstractField> abstractFields;
  265. try
  266. {
  267. abstractFields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList();
  268. }
  269. catch (InvalidShapeException e)
  270. {
  271. throw new InvalidSpatialShapeException(e,newDocId);
  272. }
  273. return new IndexingResult
  274. {
  275. Fields = abstractFields,
  276. NewDocId = newDocId,
  277. ShouldSkip = false
  278. };
  279. }
  280. private readonly ConcurrentDictionary<Type, PropertyDescriptorCollection> propertyDescriptorCache = new ConcurrentDictionary<Type, PropertyDescriptorCollection>();
  281. private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc)
  282. {
  283. PropertyDescriptorCollection properties;
  284. var newDocId = GetDocumentIdByReflection(doc, out properties);
  285. List<AbstractField> abstractFields;
  286. try
  287. {
  288. abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, properties, Field.Store.NO).ToList();
  289. }
  290. catch (InvalidShapeException e)
  291. {
  292. throw new InvalidSpatialShapeException(e, newDocId);
  293. }
  294. return new IndexingResult
  295. {
  296. Fields = abstractFields,
  297. NewDocId = newDocId,
  298. ShouldSkip = properties.Count > 1 // we always have at least __document_id
  299. && abstractFields.Count == 0
  300. };
  301. }
  302. private string GetDocumentIdByReflection(object doc, out PropertyDescriptorCollection properties)
  303. {
  304. Type type = doc.GetType();
  305. properties = propertyDescriptorCache.GetOrAdd(type, TypeDescriptor.GetProperties);
  306. return properties.Find(Constants.DocumentIdFieldName, false).GetValue(doc) as string;
  307. }
  308. public override void Remove(string[] keys, WorkContext context)
  309. {
  310. Write((writer, analyzer, stats) =>
  311. {
  312. stats.Operation = IndexingWorkStats.Status.Ignore;
  313. logIndexing.Debug(() => string.Format("Deleting ({0}) from {1}", string.Join(", ", keys), indexId));
  314. var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
  315. .Where(x => x != null)
  316. .ToList();
  317. keys.Apply(
  318. key =>
  319. InvokeOnIndexEntryDeletedOnAllBatchers(batchers, new Term(Constants.DocumentIdFieldName, key)));
  320. writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
  321. batchers.ApplyAndIgnoreAllErrors(
  322. e =>
  323. {
  324. logIndexing.WarnException("Failed to dispose on index update trigger", e);
  325. context.AddError(indexId, null, e.Message, "Dispose Trigger");
  326. },
  327. batcher => batcher.Dispose());
  328. return new IndexedItemsInfo(GetLastEtagFromStats())
  329. {
  330. ChangedDocs = keys.Length,
  331. DeletedKeys = keys
  332. };
  333. });
  334. }
  335. /// <summary>
  336. /// For index recovery purposes
  337. /// </summary>
  338. internal void RemoveDirectlyFromIndex(string[] keys, Etag lastEtag)
  339. {
  340. Write((writer, analyzer, stats) =>
  341. {
  342. stats.Operation = IndexingWorkStats.Status.Ignore;
  343. writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
  344. return new IndexedItemsInfo(lastEtag) // just commit, don't create commit point and add any infor about deleted keys
  345. {
  346. ChangedDocs = keys.Length,
  347. DisableCommitPoint = true
  348. };
  349. });
  350. }
  351. }
  352. }