PageRenderTime 38ms CodeModel.GetById 20ms app.highlight 14ms RepoModel.GetById 1ms app.codeStats 0ms

/ToMigrate/Raven.Database/Indexing/SimpleIndex.cs

http://github.com/ayende/ravendb
C# | 480 lines | 397 code | 71 blank | 12 comment | 38 complexity | 6b9ce420f7e305b8813339c5ea99c572 MD5 | raw file
Possible License(s): GPL-3.0, MPL-2.0-no-copyleft-exception, LGPL-2.1, Apache-2.0, BSD-3-Clause, CC-BY-SA-3.0
  1//-----------------------------------------------------------------------
  2// <copyright file="SimpleIndex.cs" company="Hibernating Rhinos LTD">
  3//     Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4// </copyright>
  5//-----------------------------------------------------------------------
  6using System;
  7using System.Collections.Concurrent;
  8using System.Collections.Generic;
  9using System.ComponentModel;
 10using System.Diagnostics;
 11using System.Linq;
 12using System.Threading;
 13using Lucene.Net.Documents;
 14using Lucene.Net.Index;
 15using Lucene.Net.Store;
 16using Raven.Abstractions;
 17using Raven.Abstractions.Data;
 18using Raven.Abstractions.Exceptions;
 19using Raven.Abstractions.Indexing;
 20using Raven.Abstractions.Linq;
 21using Raven.Abstractions.Logging;
 22using Raven.Database.Extensions;
 23using Raven.Database.Linq;
 24using Raven.Database.Storage;
 25using Raven.Database.Util;
 26using Spatial4n.Core.Exceptions;
 27
 28namespace Raven.Database.Indexing
 29{
 30    internal class SimpleIndex : Index
 31    {
 32        public SimpleIndex(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
 33            : base(directory, id, indexDefinition, viewGenerator, context)
 34        {
 35        }
 36
 37        public override bool IsMapReduce
 38        {
 39            get { return false; }
 40        }
 41
 42        public DateTime LastCommitPointStoreTime { get; private set; }
 43
 44        public override IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token)
 45        {
 46            token.ThrowIfCancellationRequested();
 47
 48            var count = 0;
 49            var sourceCount = 0;
 50            var writeToIndexStats = new List<PerformanceStats>();
 51
 52            IndexingPerformanceStats performance = null;
 53            var performanceStats = new List<BasePerformanceStats>();
 54
 55            var storageCommitDuration = new Stopwatch();
 56
 57            actions.BeforeStorageCommit += storageCommitDuration.Start;
 58
 59            actions.AfterStorageCommit += () =>
 60            {
 61                storageCommitDuration.Stop();
 62
 63                performanceStats.Add(PerformanceStats.From(IndexingOperation.StorageCommit, storageCommitDuration.ElapsedMilliseconds));
 64            };
 65
 66            Write((indexWriter, analyzer, stats) =>
 67            {
 68                var processedKeys = new HashSet<string>();
 69                var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
 70                    .Where(x => x != null)
 71                    .ToList();
 72
 73                try
 74                {
 75                    performance = RecordCurrentBatch("Current", "Index", batch.Docs.Count);
 76
 77                    var deleteExistingDocumentsDuration = new Stopwatch();
 78
 79                        Interlocked.Increment(ref sourceCount);
 80                    var docIdTerm = new Term(Constants.DocumentIdFieldName);
 81                    var documentsWrapped = batch.Docs.Select((doc, i) =>
 82                    {
 83                        token.ThrowIfCancellationRequested();
 84                        if (doc.__document_id == null)
 85                            throw new ArgumentException(
 86                                string.Format("Cannot index something which doesn't have a document id, but got: '{0}'", doc));
 87
 88                        string documentId = doc.__document_id.ToString();
 89                        if (processedKeys.Add(documentId) == false)
 90                            return doc;
 91
 92                        InvokeOnIndexEntryDeletedOnAllBatchers(batchers, docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
 93
 94                        if (batch.SkipDeleteFromIndex[i] == false ||
 95                            context.ShouldRemoveFromIndex(documentId)) // maybe it is recently deleted?
 96                        {
 97                            using (StopwatchScope.For(deleteExistingDocumentsDuration))
 98                            {
 99                                indexWriter.DeleteDocuments(docIdTerm.CreateTerm(documentId.ToLowerInvariant()));
100                            }
101                        }
102
103                        return doc;
104                    })
105                    .Where(x => x is FilteredDocument == false)
106                    .ToList();
107
108                    performanceStats.Add(new PerformanceStats
109                    {
110                        Name = IndexingOperation.Lucene_DeleteExistingDocument,
111                        DurationMs = deleteExistingDocumentsDuration.ElapsedMilliseconds
112                    });
113
114                    var allReferencedDocs = new ConcurrentQueue<IDictionary<string, HashSet<string>>>();
115                    var allReferenceEtags = new ConcurrentQueue<IDictionary<string, Etag>>();
116
117                    var parallelOperations = new ConcurrentQueue<ParallelBatchStats>();
118
119                    var parallelProcessingStart = SystemTime.UtcNow;
120                    context.Database.MappingThreadPool.ExecuteBatch(documentsWrapped, (IEnumerator<dynamic> partition) =>
121                    {
122                        token.ThrowIfCancellationRequested();
123                        var parallelStats = new ParallelBatchStats
124                        {
125                            StartDelay = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds
126                        };
127
128                        var anonymousObjectToLuceneDocumentConverter = new AnonymousObjectToLuceneDocumentConverter(context.Database, indexDefinition, viewGenerator, logIndexing);
129                        var luceneDoc = new Document();
130                        var documentIdField = new Field(Constants.DocumentIdFieldName, "dummy", Field.Store.YES,
131                                                        Field.Index.NOT_ANALYZED_NO_NORMS);
132
133                        using (CurrentIndexingScope.Current = new CurrentIndexingScope(context.Database, PublicName))
134                        {
135                            string currentDocId = null;
136                            int outputPerDocId = 0;
137                            Action<Exception, object> onErrorFunc;
138                            bool skipDocument = false;
139
140                            var linqExecutionDuration = new Stopwatch();
141                            var addDocumentDutation = new Stopwatch();
142                            var convertToLuceneDocumentDuration = new Stopwatch();
143
144                            foreach (var doc in RobustEnumerationIndex(partition, viewGenerator.MapDefinitions, stats, out onErrorFunc, linqExecutionDuration))
145                            {
146                                token.ThrowIfCancellationRequested();
147
148                                float boost;
149                                IndexingResult indexingResult;
150                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
151                                {
152                                    try
153                                    {
154
155                                        indexingResult = GetIndexingResult(doc, anonymousObjectToLuceneDocumentConverter, out boost);
156                                    }
157                                    catch (Exception e)
158                                    {
159                                        onErrorFunc(e, doc);
160                                        continue;
161                                    }
162                                }
163
164                                // ReSharper disable once RedundantBoolCompare --> code clarity
165                                if (indexingResult.NewDocId == null || indexingResult.ShouldSkip != false)
166                                {
167                                    continue;
168                                }
169                                if (currentDocId != indexingResult.NewDocId)
170                                {
171                                    currentDocId = indexingResult.NewDocId;
172                                    outputPerDocId = 0;
173                                    skipDocument = false;
174                                }
175                                if (skipDocument)
176                                    continue;
177                                outputPerDocId++;
178                                if (EnsureValidNumberOfOutputsForDocument(currentDocId, outputPerDocId) == false)
179                                {
180                                    skipDocument = true;
181                                    continue;
182                                }
183                                Interlocked.Increment(ref count);
184
185                                using (StopwatchScope.For(convertToLuceneDocumentDuration))
186                                {
187                                    luceneDoc.GetFields().Clear();
188                                    luceneDoc.Boost = boost;
189                                    documentIdField.SetValue(indexingResult.NewDocId.ToLowerInvariant());
190                                    luceneDoc.Add(documentIdField);
191                                    foreach (var field in indexingResult.Fields)
192                                    {
193                                        luceneDoc.Add(field);
194                                    }
195                                }
196
197                                batchers.ApplyAndIgnoreAllErrors(
198                                    exception =>
199                                    {
200                                        logIndexing.WarnException(
201                                        string.Format(
202                                            "Error when executed OnIndexEntryCreated trigger for index '{0}', key: '{1}'",
203                                            PublicName, indexingResult.NewDocId),
204                                            exception);
205                                        context.AddError(
206                                            indexId,
207                                            PublicName,
208                                            indexingResult.NewDocId,
209                                            exception,
210                                            "OnIndexEntryCreated Trigger");
211                                    },
212                                    trigger => trigger.OnIndexEntryCreated(indexingResult.NewDocId, luceneDoc));
213                                LogIndexedDocument(indexingResult.NewDocId, luceneDoc);
214
215                                using (StopwatchScope.For(addDocumentDutation))
216                                {
217                                    AddDocumentToIndex(indexWriter, luceneDoc, analyzer);
218                                }
219
220                                Interlocked.Increment(ref stats.IndexingSuccesses);
221                            }
222                            allReferenceEtags.Enqueue(CurrentIndexingScope.Current.ReferencesEtags);
223                            allReferencedDocs.Enqueue(CurrentIndexingScope.Current.ReferencedDocuments);
224
225                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.LoadDocument, CurrentIndexingScope.Current.LoadDocumentDuration.ElapsedMilliseconds));
226                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Linq_MapExecution, linqExecutionDuration.ElapsedMilliseconds));
227                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_ConvertToLuceneDocument, convertToLuceneDocumentDuration.ElapsedMilliseconds));
228                            parallelStats.Operations.Add(PerformanceStats.From(IndexingOperation.Lucene_AddDocument, addDocumentDutation.ElapsedMilliseconds));
229
230                            parallelOperations.Enqueue(parallelStats);
231                        }
232                    }, description: string.Format("Mapping index {0} from Etag {1} to Etag {2}", this.PublicName, this.GetLastEtagFromStats(), batch.HighestEtagBeforeFiltering));
233
234                    performanceStats.Add(new ParallelPerformanceStats
235                    {
236                        NumberOfThreads = parallelOperations.Count,
237                        DurationMs = (long)(SystemTime.UtcNow - parallelProcessingStart).TotalMilliseconds,
238                        BatchedOperations = parallelOperations.ToList()
239                    });
240
241                    var updateDocumentReferencesDuration = new Stopwatch();
242                    using (StopwatchScope.For(updateDocumentReferencesDuration))
243                    {
244                        UpdateDocumentReferences(actions, allReferencedDocs, allReferenceEtags);
245                    }
246                    performanceStats.Add(PerformanceStats.From(IndexingOperation.UpdateDocumentReferences, updateDocumentReferencesDuration.ElapsedMilliseconds));
247                }
248                catch (Exception e)
249                {
250                    batchers.ApplyAndIgnoreAllErrors(
251                        ex =>
252                        {
253                            logIndexing.WarnException("Failed to notify index update trigger batcher about an error in " + PublicName, ex);
254                            context.AddError(indexId, PublicName, null, ex, "AnErrorOccured Trigger");
255                        },
256                        x => x.AnErrorOccured(e));
257                    throw;
258                }
259                finally
260                {
261                    batchers.ApplyAndIgnoreAllErrors(
262                        e =>
263                        {
264                            logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
265                            context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
266                        },
267                        x => x.Dispose());
268                }
269                return new IndexedItemsInfo(batch.HighestEtagBeforeFiltering)
270                {
271                    ChangedDocs = sourceCount
272                };
273            }, writeToIndexStats);
274
275            performanceStats.AddRange(writeToIndexStats);
276
277            InitializeIndexingPerformanceCompleteDelegate(performance, sourceCount, count, performanceStats);
278
279            if (logIndexing.IsDebugEnabled)
280            logIndexing.Debug("Indexed {0} documents for {1}", count, PublicName);
281
282            return performance;
283        }
284
285        private void InitializeIndexingPerformanceCompleteDelegate(IndexingPerformanceStats performance, int sourceCount, int count, List<BasePerformanceStats> performanceStats)
286        {
287            performance.OnCompleted = () => BatchCompleted("Current", "Index", sourceCount, count, performanceStats);
288        }
289
290        protected override bool IsUpToDateEnoughToWriteToDisk(Etag highestETag)
291        {
292            bool upToDate = false;
293            context.Database.TransactionalStorage.Batch(accessor =>
294            {
295                upToDate = accessor.Staleness.GetMostRecentDocumentEtag() == highestETag;
296            });
297            return upToDate;
298        }
299
300        protected override void HandleCommitPoints(IndexedItemsInfo itemsInfo, IndexSegmentsInfo segmentsInfo)
301        {
302            logIndexing.Error("HandlingCommitPoint for index {0} in DB {1}", this.PublicName, this.context.DatabaseName);
303            if (ShouldStoreCommitPoint(itemsInfo) && itemsInfo.HighestETag != null)
304            {
305                context.IndexStorage.StoreCommitPoint(indexId.ToString(), new IndexCommitPoint
306                {
307                    HighestCommitedETag = itemsInfo.HighestETag,
308                    TimeStamp = LastIndexTime,
309                    SegmentsInfo = segmentsInfo ?? IndexStorage.GetCurrentSegmentsInfo(indexDefinition.Name, directory)
310                });
311
312                LastCommitPointStoreTime = SystemTime.UtcNow;
313            }
314            else if (itemsInfo.DeletedKeys != null && directory is RAMDirectory == false)
315            {
316                context.IndexStorage.AddDeletedKeysToCommitPoints(indexDefinition, itemsInfo.DeletedKeys);
317            }
318        }
319
320        private bool ShouldStoreCommitPoint(IndexedItemsInfo itemsInfo)
321        {
322            if (itemsInfo.DisableCommitPoint)
323                return false;
324
325            if (directory is RAMDirectory) // no point in trying to store commits for ram index
326                return false;
327            // no often than specified indexing interval
328            return (LastIndexTime - PreviousIndexTime > context.Configuration.Indexing.MinIndexingIntervalToStoreCommitPoint.AsTimeSpan ||
329                // at least once for specified time interval
330                    LastIndexTime - LastCommitPointStoreTime > context.Configuration.Indexing.MaxIndexCommitPointStoreInterval.AsTimeSpan);
331        }
332
333        private IndexingResult GetIndexingResult(object doc, AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, out float boost)
334        {
335            boost = 1;
336
337            var boostedValue = doc as BoostedValue;
338            if (boostedValue != null)
339            {
340                doc = boostedValue.Value;
341                boost = boostedValue.Boost;
342            }
343
344            IndexingResult indexingResult;
345
346            var docAsDynamicJsonObject = doc as DynamicJsonObject;
347
348            // ReSharper disable once ConvertIfStatementToConditionalTernaryExpression
349            if (docAsDynamicJsonObject != null)
350                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, docAsDynamicJsonObject);
351            else
352                indexingResult = ExtractIndexDataFromDocument(anonymousObjectToLuceneDocumentConverter, doc);
353
354            if (Math.Abs(boost - 1) > float.Epsilon)
355            {
356                foreach (var abstractField in indexingResult.Fields)
357                {
358                    abstractField.OmitNorms = false;
359                }
360            }
361
362            return indexingResult;
363        }
364
365        private class IndexingResult
366        {
367            public string NewDocId;
368            public List<AbstractField> Fields;
369            public bool ShouldSkip;
370        }
371
372        private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, DynamicJsonObject dynamicJsonObject)
373        {
374            var newDocIdAsObject = dynamicJsonObject.GetRootParentOrSelf().GetDocumentId();
375            var newDocId = newDocIdAsObject is DynamicNullObject ? null : (string)newDocIdAsObject;
376            List<AbstractField> abstractFields;
377
378            try
379            {
380                abstractFields = anonymousObjectToLuceneDocumentConverter.Index(((IDynamicJsonObject)dynamicJsonObject).Inner, Field.Store.NO).ToList();
381            }
382            catch (InvalidShapeException e)
383            {
384                throw new InvalidSpatialShapException(e, newDocId);
385            }
386
387            return new IndexingResult
388            {
389                Fields = abstractFields,
390                NewDocId = newDocId,
391                ShouldSkip = false
392            };
393        }
394
395        private readonly ConcurrentDictionary<Type, PropertyAccessor> propertyAccessorCache = new ConcurrentDictionary<Type, PropertyAccessor>();
396
397        private IndexingResult ExtractIndexDataFromDocument(AnonymousObjectToLuceneDocumentConverter anonymousObjectToLuceneDocumentConverter, object doc)
398        {
399            PropertyAccessor propertyAccessor;
400            var newDocId = GetDocumentId(doc, out propertyAccessor);
401
402            List<AbstractField> abstractFields;
403            try
404            {
405                abstractFields = anonymousObjectToLuceneDocumentConverter.Index(doc, propertyAccessor, Field.Store.NO).ToList();
406            }
407            catch (InvalidShapeException e)
408            {
409                throw new InvalidSpatialShapException(e, newDocId);
410            }
411
412            return new IndexingResult
413            {
414                Fields = abstractFields,
415                NewDocId = newDocId,
416                ShouldSkip = propertyAccessor.Properies.Count > 1  // we always have at least __document_id
417                            && abstractFields.Count == 0
418            };
419        }
420
421        private string GetDocumentId(object doc, out PropertyAccessor accessor)
422        {
423            Type type = doc.GetType();
424            accessor = propertyAccessorCache.GetOrAdd(type, PropertyAccessor.Create);
425            return accessor.GetValue(Constants.DocumentIdFieldName, doc) as string;
426        }
427
428        public override void Remove(string[] keys, WorkContext context)
429        {
430            Write((writer, analyzer, stats) =>
431            {
432                stats.Operation = IndexingWorkStats.Status.Ignore;
433                if (logIndexing.IsDebugEnabled)
434                logIndexing.Debug(() => string.Format("Deleting ({0}) from {1}", string.Join(", ", keys), PublicName));
435
436                var batchers = context.IndexUpdateTriggers.Select(x => x.CreateBatcher(indexId))
437                    .Where(x => x != null)
438                    .ToList();
439
440                keys.Apply(
441                    key =>
442                    InvokeOnIndexEntryDeletedOnAllBatchers(batchers, new Term(Constants.DocumentIdFieldName, key.ToLowerInvariant())));
443
444                writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
445                batchers.ApplyAndIgnoreAllErrors(
446                    e =>
447                    {
448                        logIndexing.WarnException("Failed to dispose on index update trigger in " + PublicName, e);
449                        context.AddError(indexId, PublicName, null, e, "Dispose Trigger");
450                    },
451                    batcher => batcher.Dispose());
452
453                return new IndexedItemsInfo(GetLastEtagFromStats())
454                {
455                    ChangedDocs = keys.Length,
456                    DeletedKeys = keys
457                };
458            });
459        }
460
461        /// <summary>
462        /// For index recovery purposes
463        /// </summary>
464        internal void RemoveDirectlyFromIndex(string[] keys, Etag lastEtag)
465        {
466            Write((writer, analyzer, stats) =>
467            {
468                stats.Operation = IndexingWorkStats.Status.Ignore;
469
470                writer.DeleteDocuments(keys.Select(k => new Term(Constants.DocumentIdFieldName, k.ToLowerInvariant())).ToArray());
471
472                return new IndexedItemsInfo(lastEtag) // just commit, don't create commit point and add any infor about deleted keys
473                {
474                    ChangedDocs = keys.Length,
475                    DisableCommitPoint = true
476                };
477            });
478        }
479    }
480}