PageRenderTime 38ms CodeModel.GetById 14ms app.highlight 18ms RepoModel.GetById 2ms app.codeStats 0ms

/ToMigrate/Raven.Database/Indexing/SimpleIndex.cs

Relevant Search: With Applications for Solr and Elasticsearch

'Chapter 4. Taming tokens'. If you want to know how to extract ideas rather than words this book is for you. Learn concepts of precision and recall, making trade-offs between them and controlling the specificity of matches. Amazon Affiliate Link
http://github.com/ayende/ravendb
C# | 480 lines | 397 code | 71 blank | 12 comment | 38 complexity | 6b9ce420f7e305b8813339c5ea99c572 MD5 | raw file
  1//-----------------------------------------------------------------------
  2// <copyright file="SimpleIndex.cs" company="Hibernating Rhinos LTD">
  3//     Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4// </copyright>
  5//-----------------------------------------------------------------------
  6using System;
  7using System.Collections.Concurrent;
  8using System.Collections.Generic;
  9using System.ComponentModel;
 10using System.Diagnostics;
 11using System.Linq;
 12using System.Threading;
 13using Lucene.Net.Documents;
 14using Lucene.Net.Index;
 15using Lucene.Net.Store;
 16using Raven.Abstractions;
 17using Raven.Abstractions.Data;
 18using Raven.Abstractions.Exceptions;
 19using Raven.Abstractions.Indexing;
 20using Raven.Abstractions.Linq;
 21using Raven.Abstractions.Logging;
 22using Raven.Database.Extensions;
 23using Raven.Database.Linq;
 24using Raven.Database.Storage;
 25using Raven.Database.Util;
 26using Spatial4n.Core.Exceptions;
 27
 28namespace Raven.Database.Indexing
 29{
 30    internal class SimpleIndex : Index
 31    {
 32        public SimpleIndex(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
 33            : base(directory, id, indexDefinition, viewGenerator, context)
 34        {
 35        }
 36
 37        public override bool IsMapReduce
 38        {
 39            get { return false; }
 40        }
 41
 42        public DateTime LastCommitPointStoreTime { get; private set; }
 43
 44        public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
 45        {
 46            token.ThrowIfCancellationRequested();
 47
 48            var count = 0;
 49            var sourceCount = 0;
 50            var writeToIndexStats = new List<PerformanceStats>();
 51
 52            IndexingPerformanceStats performance = null;
 53            var performanceStats = new List<BasePerformanceStats>();
 54
 55            var storageCommitDuration = new Stopwatch();
 56
 57            actions.BeforeStorageCommit += storageCommitDuration.Start;
 58
 59            actions.AfterStorageCommit += () =>
 60            {
 61                storageCommitDuration.Stop();
 62
 63                performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
 64            };
 65
 66            Write((indexWriter, analyzer, stats) =>
 67            {
 68                var processedKeys = new HashSet<string>();
 69                var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
 70                    .Where(x => x != null)
 71                    .ToList();
 72
 73                try
 74                {
 75                    performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);
 76
 77                    var deleteExistingDocumentsDuration = new Stopwatch();
 78
 79                        Interlocked.Increment(ref sourceCount);
 80                    var docIdTerm = new Term(Constants.DocumentIdFieldName);
 81                    var documentsWrapped = batch.Docs.Select((doc, i) =>
 82                    {
 83                        token.ThrowIfCancellationRequested();
 84                        if (doc.__document_id == null)
 85                            throw new ArgumentException(
 86                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
 87
 88                        string documentId = doc.__document_id.ToString();
 89                        if (processedKeys.Add(documentId) == false)
 90                            return doc;
 91
 92                        InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
 93
 94                        if (batch.SkipDeleteFromIndex[i] == false ||
 95                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
 96                        {
 97                            using (StopwatchScope.For(deleteExistingDocumentsDuration))
 98                            {
 99                                indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
100                            }
101                        }
102
103                        return doc;
104                    })
105                    .Where(x => x is FilteredDocument == false)
106                    .ToList();
107
108                    performanceStats.Add(new PerformanceStats
109                    {
110                        Name = IndexingOperation.Lucene_DeleteExistingDocument,
111                        DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
112                    });
113
114                    var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
115                    var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
116
117                    var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();
118
119                    var parallelProcessingStart = SystemTime.UtcNow;
120                    context.Database.MappingThreadPool.ExecuteBatch(documentsWrapped, (IEnumerator<dynamic> partition) =>
121                    {
122                        token.ThrowIfCancellationRequested();
123                        var parallelStats = new ParallelBatchStats
124                        {
125                            StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
126                        };
127
128                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
129                        var luceneDoc = new Document();
130                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
131                                                        Field.Index.NOT_ANALYZED_NO_NORMS);
132
133                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
134                        {
135                            string currentDocId = null;
136                            int outputPerDocId = 0;
137                            Action<Exception, object> onErrorFunc;
138                            bool skipDocument = false;
139
140                            var linqExecutionDuration = new Stopwatch();
141                            var addDocumentDutation = new Stopwatch();
142                            var convertToLuceneDocumentDuration = new Stopwatch();
143
144                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
145                            {
146                                token.ThrowIfCancellationRequested();
147
148                                float boost;
149                                IndexingResult indexingResult;
150                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
151                                {
152                                    try
153                                    {
154
155                                        indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
156                                    }
157                                    catch (Exception e)
158                                    {
159                                        onErrorFunc(e, doc);
160                                        continue;
161                                    }
162                                }
163
164                                // ReSharper disable once RedundantBoolCompare --> code clarity
165                                if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
166                                {
167                                    continue;
168                                }
169                                if (currentDocId != indexingResult.NewDocId)
170                                {
171                                    currentDocId = indexingResult.NewDocId;
172                                    outputPerDocId = 0;
173                                    skipDocument = false;
174                                }
175                                if (skipDocument)
176                                    continue;
177                                outputPerDocId++;
178                                if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
179                                {
180                                    skipDocument = true;
181                                    continue;
182                                }
183                                Interlocked.Increment(ref count);
184
185                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
186                                {
187                                    luceneDoc.GetFields().Clear();
188                                    luceneDoc.Boost = boost;
189                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
190                                    luceneDoc.Add(documentIdField);
191                                    foreach (var field in indexingResult.Fields)
192                                    {
193                                        luceneDoc.Add(field);
194                                    }
195                                }
196
197                                batchers.ApplyAndIgnoreAllErrors(
198                                    exception =>
199                                    {
200                                        logIndexing.WarnException(
201                                        string.Format(
202                                            "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
203                                            PublicName, indexingResult.NewDocId),
204                                            exception);
205                                        context.AddError(
206                                            indexId,
207                                            PublicName,
208                                            indexingResult.NewDocId,
209                                            exception,
210                                            "OnIndexEntryCreated Trigger");
211                                    },
212                                    trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
213                                LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
214
215                                using (StopwatchScope.For(addDocumentDutation))
216                                {
217                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
218                                }
219
220                                Interlocked.Increment(ref stats.IndexingSuccesses);
221                            }
222                            allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
223                            allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
224
225                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
226                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
227                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
228                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));
229
230                            parallelOperations.Enqueue(parallelStats);
231                        }
232                    }, description: string.Format("Mapping index {0} from Etag {1} to Etag {2}", this.PublicName, this.GetLastEtagFromStats(), batch.HighestEtagBeforeFiltering));
233
234                    performanceStats.Add(new ParallelPerformanceStats
235                    {
236                        NumberOfThreads = parallelOperations.Count,
237                        DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
238                        BatchedOperations = parallelOperations.ToList()
239                    });
240
241                    var updateDocumentReferencesDuration = new Stopwatch();
242                    using (StopwatchScope.For(updateDocumentReferencesDuration))
243                    {
244                        UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
245                    }
246                    performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
247                }
248                catch (Exception e)
249                {
250                    batchers.ApplyAndIgnoreAllErrors(
251                        ex =>
252                        {
253                            logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
254                            context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
255                        },
256                        x => x.AnErrorOccured(e));
257                    throw;
258                }
259                finally
260                {
261                    batchers.ApplyAndIgnoreAllErrors(
262                        e =>
263                        {
264                            logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
265                            context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
266                        },
267                        x => x.Dispose());
268                }
269                return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
270                {
271                    ChangedDocs = sourceCount
272                };
273            }, writeToIndexStats);
274
275            performanceStats.AddRange(writeToIndexStats);
276
277            InitializeIndexingPerformanceCompleteDelegate(performance, sourceCount, count, performanceStats);
278
279            if (logIndexing.IsDebugEnabled)
280            logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);
281
282            return performance;
283        }
284
285        private void InitializeIndexingPerformanceCompleteDelegate(IndexingPerformanceStats performance, int sourceCount, int count, List<BasePerformanceStats> performanceStats)
286        {
287            performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats);
288        }
289
290        protected override bool IsUpToDateEnoughToWriteToDisk(Etag highestETag)
291        {
292            bool upToDate = false;
293            context.Database.TransactionalStorage.Batch(accessor =>
294            {
295                upToDate = accessor.Staleness.GetMostRecentDocumentEtag() == highestETag;
296            });
297            return upToDate;
298        }
299
300        protected override void HandleCommitPoints(IndexedItemsInfo itemsInfo, IndexSegmentsInfo segmentsInfo)
301        {
302            logIndexing.Error("HandlingCommitPoint for index {0} in DB {1}", this.PublicName, this.context.DatabaseName);
303            if (ShouldStoreCommitPoint(itemsInfo) && itemsInfo.HighestETag != null)
304            {
305                context.IndexStorage.StoreCommitPoint(indexId.ToString(), new IndexCommitPoint
306                {
307                    HighestCommitedETag = itemsInfo.HighestETag,
308                    TimeStamp = LastIndexTime,
309                    SegmentsInfo = segmentsInfo ?? IndexStorage.GetCurrentSegmentsInfo(indexDefinition.Name, directory)
310                });
311
312                LastCommitPointStoreTime = SystemTime.UtcNow;
313            }
314            else if (itemsInfo.DeletedKeys != null && directory is RAMDirectory == false)
315            {
316                context.IndexStorage.AddDeletedKeysToCommitPoints(indexDefinition, itemsInfo.DeletedKeys);
317            }
318        }
319
320        private bool ShouldStoreCommitPoint(IndexedItemsInfo itemsInfo)
321        {
322            if (itemsInfo.DisableCommitPoint)
323                return false;
324
325            if (directory is RAMDirectory) // no point in trying to store commits for ram index
326                return false;
327            // no often than specified indexing interval
328            return (LastIndexTime - PreviousIndexTime > context.Configuration.Indexing.MinIndexingIntervalToStoreCommitPoint.AsTimeSpan ||
329                // at least once for specified time interval
330                    LastIndexTime - LastCommitPointStoreTime > context.Configuration.Indexing.MaxIndexCommitPointStoreInterval.AsTimeSpan);
331        }
332
333        private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost)
334        {
335            boost = 1;
336
337            var boostedValue = doc as BoostedValue;
338            if (boostedValue != null)
339            {
340                doc = boostedValue.Value;
341                boost = boostedValue.Boost;
342            }
343
344            IndexingResult indexingResult;
345
346            var docAsDynamicJsonObject = doc as DynamicJsonObject;
347
348            // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression
349            if (docAsDynamicJsonObject != null)
350                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, docAsDynamicJsonObject);
351            else
352                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc);
353
354            if (Math.Abs(boost - 1) > float.Epsilon)
355            {
356                foreach (var abstractField in indexingResult.Fields)
357                {
358                    abstractField.OmitNorms = false;
359                }
360            }
361
362            return indexingResult;
363        }
364
365        private class IndexingResult
366        {
367            public string NewDocId;
368            public List<AbstractField> Fields;
369            public bool ShouldSkip;
370        }
371
372        private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject)
373        {
374            var newDocIdAsObject = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId();
375            var newDocId = newDocIdAsObject is DynamicNullObject ? null : (string)newDocIdAsObject;
376            List<AbstractField> abstractFields;
377
378            try
379            {
380                abstractFields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList();
381            }
382            catch (InvalidShapeException e)
383            {
384                throw new InvalidSpatialShapException(e, newDocId);
385            }
386
387            return new IndexingResult
388            {
389                Fields = abstractFields,
390                NewDocId = newDocId,
391                ShouldSkip = false
392            };
393        }
394
395        private readonly ConcurrentDictionary<Type, PropertyAccessor> propertyAccessorCache = new ConcurrentDictionary<Type, PropertyAccessor>();
396
397        private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc)
398        {
399            PropertyAccessor propertyAccessor;
400            var newDocId = GetDocumentId(doc, out propertyAccessor);
401
402            List<AbstractField> abstractFields;
403            try
404            {
405                abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, propertyAccessor, Field.Store.NO).ToList();
406            }
407            catch (InvalidShapeException e)
408            {
409                throw new InvalidSpatialShapException(e, newDocId);
410            }
411
412            return new IndexingResult
413            {
414                Fields = abstractFields,
415                NewDocId = newDocId,
416                ShouldSkip = propertyAccessor.Properies.Count > 1  // we always have at least __document_id
417                            && abstractFields.Count == 0
418            };
419        }
420
421        private string GetDocumentId(object doc, out PropertyAccessor accessor)
422        {
423            Type type = doc.GetType();
424            accessor = propertyAccessorCache.GetOrAdd(type, PropertyAccessor.Create);
425            return accessor.GetValue(Constants.DocumentIdFieldName, doc) as string;
426        }
427
428        public override void Remove(string[] keys, WorkContext context)
429        {
430            Write((writer, analyzer, stats) =>
431            {
432                stats.Operation = IndexingWorkStats.Status.Ignore;
433                if (logIndexing.IsDebugEnabled)
434                logIndexing.Debug(() => string.Format("Deleting ({0}) from {1}", string.Join(", ", keys), PublicName));
435
436                var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
437                    .Where(x => x != null)
438                    .ToList();
439
440                keys.Apply(
441                    key =>
442                    InvokeOnIndexEntryDeletedOnAllBatchers(batchers, new Term(Constants.DocumentIdFieldName, key.ToLowerInvariant())));
443
444                writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
445                batchers.ApplyAndIgnoreAllErrors(
446                    e =>
447                    {
448                        logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
449                        context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
450                    },
451                    batcher => batcher.Dispose());
452
453                return new IndexedItemsInfo(GetLastEtagFromStats())
454                {
455                    ChangedDocs = keys.Length,
456                    DeletedKeys = keys
457                };
458            });
459        }
460
461        /// <summary>
462        /// For index recovery purposes
463        /// </summary>
464        internal void RemoveDirectlyFromIndex(string[] keys, Etag lastEtag)
465        {
466            Write((writer, analyzer, stats) =>
467            {
468                stats.Operation = IndexingWorkStats.Status.Ignore;
469
470                writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
471
472                return new IndexedItemsInfo(lastEtag) // just commit, don't create commit point and add any infor about deleted keys
473                {
474                    ChangedDocs = keys.Length,
475                    DisableCommitPoint = true
476                };
477            });
478        }
479    }
480}