PageRenderTime 10ms CodeModel.GetById 3ms app.highlight 42ms RepoModel.GetById 1ms app.codeStats 1ms

/ToMigrate/Raven.Database/Indexing/IndexStorage.cs

Relevant Search: With Applications for Solr and Elasticsearch

'Chapter 4. Taming tokens'. If you want to know how to extract ideas rather than words this book is for you. Learn concepts of precision and recall, making trade-offs between them and controlling the specificity of matches. Amazon Affiliate Link
http://github.com/ayende/ravendb
C# | 1691 lines | 1359 code | 285 blank | 47 comment | 242 complexity | 5b0b2dda17805dbd210707531b5ba65d MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1//-----------------------------------------------------------------------
   2// <copyright file="IndexStorage.cs" company="Hibernating Rhinos LTD">
   3//     Copyright (c) Hibernating Rhinos LTD. All rights reserved.
   4// </copyright>
   5//-----------------------------------------------------------------------
   6using System;
   7using System.Collections.Concurrent;
   8using System.Collections.Generic;
   9using System.ComponentModel.Composition;
  10using System.Diagnostics;
  11using System.Globalization;
  12using System.IO;
  13using System.Linq;
  14using System.Runtime.ConstrainedExecution;
  15using System.Text;
  16using System.Threading;
  17using System.Threading.Tasks;
  18using Lucene.Net.Analysis;
  19using Lucene.Net.Index;
  20using Lucene.Net.Search;
  21using Lucene.Net.Store;
  22using Lucene.Net.Util;
  23using Raven.Abstractions;
  24using Raven.Abstractions.Data;
  25using Raven.Abstractions.Extensions;
  26using Raven.Abstractions.Indexing;
  27using Raven.Abstractions.Logging;
  28using Raven.Abstractions.MEF;
  29using Raven.Abstractions.Util;
  30using Raven.Database.Actions;
  31using Raven.Database.Config;
  32using Raven.Database.Data;
  33using Raven.Database.Extensions;
  34using Raven.Database.Impl;
  35using Raven.Database.Linq;
  36using Raven.Database.Plugins;
  37using Raven.Database.Queries;
  38using Raven.Database.Storage;
  39using Raven.Database.Util;
  40using Raven.Imports.Newtonsoft.Json;
  41using Raven.Imports.Newtonsoft.Json.Linq;
  42using Raven.Json.Linq;
  43using Constants = Raven.Abstractions.Data.Constants;
  44using Directory = Lucene.Net.Store.Directory;
  45
  46namespace Raven.Database.Indexing
  47{
  48    /// <summary>
  49    /// 	Thread safe, single instance for the entire application
  50    /// </summary>
  51    public class IndexStorage : CriticalFinalizerObject, IDisposable
  52    {
  53        private readonly DocumentDatabase documentDatabase;
  54        private const string IndexVersion = "2.0.0.1";
  55        private const string MapReduceIndexVersion = "2.5.0.1";
  56
  57        private readonly IndexDefinitionStorage indexDefinitionStorage;
  58        private readonly RavenConfiguration configuration;
  59        private readonly string path;
  60        private static readonly ILog log = LogManager.GetCurrentClassLogger();
  61        private static readonly ILog startupLog = LogManager.GetLogger(typeof(IndexStorage).FullName + ".Startup");
  62        private readonly Analyzer dummyAnalyzer = new SimpleAnalyzer();
  63        private DateTime latestPersistedQueryTime;
  64        private readonly FileStream crashMarker;
  65        private ConcurrentDictionary<int, Index> indexes =
  66            new ConcurrentDictionary<int, Index>();
  67
  68        public class RegisterLowMemoryHandler : ILowMemoryHandler
  69        {
  70            static RegisterLowMemoryHandler _instance;
  71
  72            public static void Setup()
  73            {
  74                if (_instance != null)
  75                    return;
  76                lock (typeof(RegisterLowMemoryHandler))
  77                {
  78                    if (_instance != null)
  79                        return;
  80                    _instance = new RegisterLowMemoryHandler();
  81                    MemoryStatistics.RegisterLowMemoryHandler(_instance);
  82                }
  83            }
  84
  85            public void HandleLowMemory()
  86            {
  87                FieldCache_Fields.DEFAULT.PurgeAllCaches();
  88
  89            }
  90
  91            public void SoftMemoryRelease()
  92            {
  93            }
  94
  95            public LowMemoryHandlerStatistics GetStats()
  96            {
  97                var cacheEntries = FieldCache_Fields.DEFAULT.GetCacheEntries();
  98                var memorySum = cacheEntries.Sum(x =>
  99                {
 100                    var curEstimator = new RamUsageEstimator(false);
 101                    return curEstimator.EstimateRamUsage(x);
 102                });
 103                return new LowMemoryHandlerStatistics
 104                {
 105                    Name = "LuceneLowMemoryHandler",
 106                    EstimatedUsedMemory = memorySum,
 107                    Metadata = new
 108                    {
 109                        CachedEntriesAmount = cacheEntries.Length
 110                    }
 111                };
 112            }
 113        }
 114
 115        public IndexStorage(IndexDefinitionStorage indexDefinitionStorage, RavenConfiguration configuration, DocumentDatabase documentDatabase)
 116        {
 117            try
 118            {
 119                RegisterLowMemoryHandler.Setup();
 120                this.indexDefinitionStorage = indexDefinitionStorage;
 121                this.configuration = configuration;
 122                this.documentDatabase = documentDatabase;
 123                path = configuration.Core.IndexStoragePath;
 124
 125                if (System.IO.Directory.Exists(path) == false && configuration.Core.RunInMemory == false)
 126                    System.IO.Directory.CreateDirectory(path);
 127
 128                if (configuration.Core.RunInMemory == false)
 129                {
 130                    var crashMarkerPath = Path.Combine(path, "indexing.crash-marker");
 131
 132                    if (File.Exists(crashMarkerPath))
 133                    {
 134                        // the only way this can happen is if we crashed because of a power outage
 135                        // in this case, we consider all open indexes to be corrupt and force them
 136                        // to be reset. This is because to get better perf, we don't flush the files to disk,
 137                        // so in the case of a power outage, we can't be sure that there wasn't still stuff in
 138                        // the OS buffer that wasn't written yet.
 139                        configuration.Indexing.ResetIndexOnUncleanShutdown = true;
 140                    }
 141
 142                    // The delete on close ensures that the only way this file will exists is if there was
 143                    // a power outage while the server was running.
 144                    crashMarker = File.Create(crashMarkerPath, 16, FileOptions.DeleteOnClose);
 145                }
 146
 147                if (log.IsDebugEnabled)
 148                    log.Debug("Start opening indexes. There are {0} indexes that need to be loaded", indexDefinitionStorage.IndexNames.Length);
 149
 150                BackgroundTaskExecuter.Instance.ExecuteAllInterleaved(documentDatabase.WorkContext, indexDefinitionStorage.IndexNames,
 151                    name =>
 152                    {
 153                        var index = OpenIndex(name, onStartup: true, forceFullIndexCheck: false);
 154
 155                        if (index != null)
 156                            indexes.TryAdd(index.IndexId, index);
 157                        if (startupLog.IsDebugEnabled)
 158                            startupLog.Debug("{0}/{1} indexes loaded", indexes.Count, indexDefinitionStorage.IndexNames.Length);
 159                    });
 160                if (log.IsDebugEnabled)
 161                    log.Debug("Index storage initialized. All indexes have been opened.");
 162            }
 163            catch (Exception e)
 164            {
 165                log.WarnException("Could not create index storage", e);
 166                try
 167                {
 168                    Dispose();
 169                }
 170                catch (Exception ex)
 171                {
 172                    log.FatalException("Failed to dispose when already getting an error during ctor", ex);
 173                }
 174                throw;
 175            }
 176        }
 177
 178        private Index OpenIndex(string indexName, bool onStartup, bool forceFullIndexCheck)
 179        {
 180            if (indexName == null)
 181                throw new ArgumentNullException("indexName");
 182            if (startupLog.IsDebugEnabled)
 183                startupLog.Debug("Loading saved index {0}", indexName);
 184
 185            var indexDefinition = indexDefinitionStorage.GetIndexDefinition(indexName);
 186            if (indexDefinition == null)
 187                return null;
 188
 189            Index indexImplementation = null;
 190            bool resetTried = false;
 191            bool recoveryTried = false;
 192            string[] keysToDeleteAfterRecovery = null;
 193            while (true)
 194            {
 195                Directory luceneDirectory = null;
 196                try
 197                {
 198                    luceneDirectory = OpenOrCreateLuceneDirectory(indexDefinition, createIfMissing: resetTried, forceFullExistingIndexCheck: forceFullIndexCheck);
 199                    indexImplementation = CreateIndexImplementation(indexDefinition, luceneDirectory);
 200
 201                    CheckIndexState(luceneDirectory, indexDefinition, indexImplementation, resetTried);
 202
 203                    if (forceFullIndexCheck)
 204                    {
 205                        // the above index check might pass however an index writer creation can still throw an exception
 206                        // so we need to check it here to avoid crashing in runtime
 207                        new IndexWriter(luceneDirectory, dummyAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED).Dispose();
 208                    }
 209
 210                    var simpleIndex = indexImplementation as SimpleIndex; // no need to do this on m/r indexes, since we rebuild them from saved data anyway
 211                    if (simpleIndex != null && keysToDeleteAfterRecovery != null)
 212                    {
 213                        // remove keys from index that were deleted after creating commit point
 214                        simpleIndex.RemoveDirectlyFromIndex(keysToDeleteAfterRecovery, GetLastEtagForIndex(simpleIndex));
 215                    }
 216
 217                    LoadExistingSuggestionsExtentions(indexDefinition.Name, indexImplementation);
 218
 219                    documentDatabase.TransactionalStorage.Batch(accessor =>
 220                    {
 221                        IndexStats indexStats = accessor.Indexing.GetIndexStats(indexDefinition.IndexId);
 222                        if (indexStats != null)
 223                            indexImplementation.Priority = indexStats.Priority;
 224
 225                        var read = accessor.Lists.Read("Raven/Indexes/QueryTime", indexName);
 226                        if (read == null)
 227                        {
 228                            if (IsIdleAutoIndex(indexImplementation))
 229                                indexImplementation.MarkQueried(); // prevent index abandoning right after startup
 230
 231                            return;
 232                        }
 233
 234                        var dateTime = read.Data.Value<DateTime>("LastQueryTime");
 235
 236                        if (IsIdleAutoIndex(indexImplementation) && SystemTime.UtcNow - dateTime > configuration.Indexing.TimeToWaitBeforeRunningAbandonedIndexes.AsTimeSpan)
 237                            indexImplementation.MarkQueried(); // prevent index abandoning right after startup
 238                        else
 239                            indexImplementation.MarkQueried(dateTime);
 240
 241                        if (dateTime > latestPersistedQueryTime)
 242                            latestPersistedQueryTime = dateTime;
 243                    });
 244
 245                    break;
 246                }
 247                catch (Exception e)
 248                {
 249                    if (resetTried)
 250                        throw new InvalidOperationException("Could not open / create index" + indexName + ", reset already tried", e);
 251
 252                    if (indexImplementation != null)
 253                        indexImplementation.Dispose();
 254
 255                    if (recoveryTried == false && luceneDirectory != null)
 256                    {
 257                        recoveryTried = true;
 258                        startupLog.WarnException("Could not open index " + indexName + ". Trying to recover index", e);
 259
 260                        keysToDeleteAfterRecovery = TryRecoveringIndex(indexDefinition, luceneDirectory);
 261                    }
 262                    else
 263                    {
 264                        resetTried = true;
 265                        startupLog.WarnException("Could not open index " + indexName + ". Recovery operation failed, forcibly resetting index", e);
 266                        TryResettingIndex(indexName, indexDefinition, onStartup);
 267                    }
 268                }
 269            }
 270
 271            return indexImplementation;
 272        }
 273
 274        private void CheckIndexState(Directory directory, IndexDefinition indexDefinition, Index index, bool resetTried)
 275        {
 276            //if (configuration.ResetIndexOnUncleanShutdown == false)
 277            //	return;
 278
 279            // 1. If commitData is null it means that there were no commits, so just in case we are resetting to Etag.Empty
 280            // 2. If no 'LastEtag' in commitData then we consider it an invalid index
 281            // 3. If 'LastEtag' is present (and valid), then resetting to it (if it is lower than lastStoredEtag)
 282
 283            var commitData = IndexReader.GetCommitUserData(directory);
 284
 285            if (index.IsMapReduce)
 286                CheckMapReduceIndexState(commitData, resetTried);
 287            else
 288                CheckMapIndexState(commitData, indexDefinition, index);
 289        }
 290
 291        private void CheckMapIndexState(IDictionary<string, string> commitData, IndexDefinition indexDefinition, Index index)
 292        {
 293            string value;
 294            Etag lastEtag = null;
 295            if (commitData != null && commitData.TryGetValue("LastEtag", out value))
 296                Etag.TryParse(value, out lastEtag); // etag will be null if parsing will fail
 297
 298            var lastStoredEtag = GetLastEtagForIndex(index) ?? Etag.Empty;
 299            lastEtag = lastEtag ?? Etag.Empty;
 300
 301            if (EtagUtil.IsGreaterThanOrEqual(lastEtag, lastStoredEtag))
 302                return;
 303
 304            log.Info(string.Format("Resetting index '{0} ({1})'. Last stored etag: {2}. Last commit etag: {3}.", indexDefinition.Name, index.indexId, lastStoredEtag, lastEtag));
 305
 306            var now = SystemTime.UtcNow;
 307            ResetLastIndexedEtag(indexDefinition, lastEtag, now);
 308        }
 309
 310        private static void CheckMapReduceIndexState(IDictionary<string, string> commitData, bool resetTried)
 311        {
 312            if (resetTried)
 313                return;
 314
 315            string marker;
 316            long commitMarker;
 317            var valid = commitData != null
 318                && commitData.TryGetValue("Marker", out marker)
 319                && long.TryParse(marker, out commitMarker)
 320                && commitMarker == RavenIndexWriter.CommitMarker;
 321
 322            if (valid == false)
 323                throw new InvalidOperationException("Map-Reduce index corruption detected.");
 324        }
 325
 326        private static bool IsIdleAutoIndex(Index index)
 327        {
 328            return index.PublicName.StartsWith("Auto/") && index.Priority == IndexingPriority.Idle;
 329        }
 330
 331        private void TryResettingIndex(string indexName, IndexDefinition indexDefinition, bool onStartup)
 332        {
 333            try
 334            {
 335                Action reset = () =>
 336                {
 337                    try
 338                    {
 339                        documentDatabase.Indexes.DeleteIndex(indexDefinition, removeIndexReplaceDocument: false);
 340                        documentDatabase.Indexes.PutNewIndexIntoStorage(indexName, indexDefinition);
 341
 342                        var indexReplaceDocumentKey = Constants.IndexReplacePrefix + indexName;
 343                        var indexReplaceDocument = documentDatabase.Documents.Get(indexReplaceDocumentKey);
 344                        if (indexReplaceDocument == null)
 345                            return;
 346
 347                        documentDatabase.Documents.Put(indexReplaceDocumentKey, null, indexReplaceDocument.DataAsJson, indexReplaceDocument.Metadata, null);
 348                    }
 349                    catch (Exception e)
 350                    {
 351                        throw new InvalidOperationException("Could not finalize reseting of index: " + indexName, e);
 352                    }
 353                };
 354
 355                if (onStartup)
 356                {
 357                    // we have to defer the work here until the database is actually ready for work
 358                    documentDatabase.OnIndexingWiringComplete += reset;
 359                }
 360                else
 361                {
 362                    reset();
 363                }
 364
 365                var indexFullPath = Path.Combine(path, indexDefinition.IndexId.ToString(CultureInfo.InvariantCulture));
 366                IOExtensions.DeleteDirectory(indexFullPath);
 367
 368                var suggestionsForIndex = Path.Combine(configuration.Core.IndexStoragePath, "Raven-Suggestions", indexName);
 369                IOExtensions.DeleteDirectory(suggestionsForIndex);
 370
 371            }
 372            catch (Exception exception)
 373            {
 374                throw new InvalidOperationException("Could not reset index " + indexName, exception);
 375            }
 376        }
 377
 378        private string[] TryRecoveringIndex(IndexDefinition indexDefinition,
 379                                            Directory luceneDirectory)
 380        {
 381            string[] keysToDeleteAfterRecovery = null;
 382            if (indexDefinition.IsMapReduce == false)
 383            {
 384                IndexCommitPoint commitUsedToRestore;
 385
 386                if (TryReusePreviousCommitPointsToRecoverIndex(luceneDirectory,
 387                                                               indexDefinition, path,
 388                                                               out commitUsedToRestore,
 389                                                               out keysToDeleteAfterRecovery))
 390                {
 391                    ResetLastIndexedEtag(indexDefinition, commitUsedToRestore.HighestCommitedETag, commitUsedToRestore.TimeStamp);
 392                }
 393            }
 394            else
 395            {
 396                RegenerateMapReduceIndex(luceneDirectory, indexDefinition);
 397            }
 398            return keysToDeleteAfterRecovery;
 399        }
 400
 401        private void LoadExistingSuggestionsExtentions(string indexName, Index indexImplementation)
 402        {
 403            var suggestionsForIndex = Path.Combine(configuration.Core.IndexStoragePath, "Raven-Suggestions", indexName);
 404            if (!System.IO.Directory.Exists(suggestionsForIndex))
 405                return;
 406
 407            try
 408            {
 409                var directories = System.IO.Directory.GetDirectories(suggestionsForIndex);
 410                if (directories.Any(dir => dir.Contains("-")))
 411                {
 412                    // Legacy handling:
 413                    // Previously we had separate folder with suggestions for each triple: (field, distanceType, accuracy)
 414                    // Now we have field only.
 415                    // Legacy naming convention was: field-{distanceType}-{accuracy}
 416                    // since when we have - (dash) in SOME folder name it seems to be legacy
 417                    HandleLegacySuggestions(directories);
 418
 419                    // Refresh directories list as handling legacy might rename or delete some of them.					
 420                    directories = System.IO.Directory.GetDirectories(suggestionsForIndex);
 421                }
 422
 423                foreach (var directory in directories)
 424                {
 425                    IndexSearcher searcher;
 426                    using (indexImplementation.GetSearcher(out searcher))
 427                    {
 428                        var key = Path.GetFileName(directory);
 429                        var field = MonoHttpUtility.UrlDecode(key);
 430                        var extension = new SuggestionQueryIndexExtension(
 431                            indexImplementation,
 432                            documentDatabase.WorkContext,
 433                            Path.Combine(configuration.Core.IndexStoragePath, "Raven-Suggestions", indexName, key),
 434                            searcher.IndexReader.Directory() is RAMDirectory,
 435                            field);
 436                        indexImplementation.SetExtension(key, extension);
 437                    }
 438                }
 439            }
 440            catch (Exception e)
 441            {
 442                log.WarnException("Could not open suggestions for index " + indexName + ", resetting the index", e);
 443                try
 444                {
 445                    IOExtensions.DeleteDirectory(suggestionsForIndex);
 446                }
 447                catch (Exception)
 448                {
 449                    // ignore the failure
 450                }
 451                throw;
 452            }
 453        }
 454
 455        internal static void HandleLegacySuggestions(string[] directories)
 456        {
 457            var alreadySeenFields = new HashSet<string>();
 458            foreach (var directory in directories)
 459            {
 460                var key = Path.GetFileName(directory);
 461                var parentDir = System.IO.Directory.GetParent(directory).FullName;
 462
 463                if (key.Contains("-"))
 464                {
 465                    var tokens = key.Split('-');
 466                    var field = tokens[0];
 467                    if (alreadySeenFields.Contains(field))
 468                    {
 469                        log.Info("Removing legacy suggestions: {0}", directory);
 470                        IOExtensions.DeleteDirectory(directory);
 471                    }
 472                    else
 473                    {
 474                        alreadySeenFields.Add(field);
 475                        var newLocation = Path.Combine(parentDir, field);
 476
 477                        log.Info("Moving suggestions from: {0} to {1}", directory, newLocation);
 478                        System.IO.Directory.Move(directory, newLocation);
 479                    }
 480                }
 481                else
 482                {
 483                    alreadySeenFields.Add(key);
 484                }
 485            }
 486        }
 487
 488        protected Lucene.Net.Store.Directory OpenOrCreateLuceneDirectory(IndexDefinition indexDefinition, bool createIfMissing = true, bool forceFullExistingIndexCheck = false)
 489        {
 490            Lucene.Net.Store.Directory directory;
 491            if (configuration.Core.RunInMemory ||
 492                (indexDefinition.IsMapReduce == false &&  // there is no point in creating map/reduce indexes in memory, we write the intermediate results to disk anyway
 493                 indexDefinitionStorage.IsNewThisSession(indexDefinition) &&
 494                 indexDefinition.DisableInMemoryIndexing == false &&
 495                 configuration.Indexing.DisableInMemoryIndexing == false &&
 496                 forceFullExistingIndexCheck == false))
 497            {
 498                directory = new RAMDirectory();
 499                new IndexWriter(directory, dummyAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED).Dispose(); // creating index structure
 500            }
 501            else
 502            {
 503                var indexDirectory = indexDefinition.IndexId.ToString();
 504                var indexFullPath = Path.Combine(path, indexDirectory);
 505                directory = new LuceneCodecDirectory(indexFullPath, documentDatabase.IndexCodecs.OfType<AbstractIndexCodec>());
 506
 507                if (!IndexReader.IndexExists(directory))
 508                {
 509                    if (createIfMissing == false)
 510                        throw new InvalidOperationException(string.Format("Index directory '{0}' does not exists for '{1}' index.", indexFullPath, indexDefinition.Name));
 511
 512                    WriteIndexVersion(directory, indexDefinition);
 513
 514                    //creating index structure if we need to
 515                    new IndexWriter(directory, dummyAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED).Dispose();
 516                }
 517                else
 518                {
 519                    EnsureIndexVersionMatches(directory, indexDefinition);
 520
 521                    if (forceFullExistingIndexCheck == false)
 522                    {
 523                        if (directory.FileExists("write.lock")) // force lock release, because it was still open when we shut down
 524                        {
 525                            IndexWriter.Unlock(directory);
 526                            // for some reason, just calling unlock doesn't remove this file
 527                            directory.DeleteFile("write.lock");
 528                        }
 529                        if (directory.FileExists("writing-to-index.lock")) // we had an unclean shutdown
 530                        {
 531                            if (configuration.Indexing.ResetIndexOnUncleanShutdown)
 532                                throw new InvalidOperationException(string.Format("Rude shutdown detected on '{0}' index in '{1}' directory.", indexDefinition.Name, indexFullPath));
 533
 534                            CheckIndexAndTryToFix(directory, indexDefinition);
 535                            directory.DeleteFile("writing-to-index.lock");
 536                        }
 537                    }
 538                    else
 539                    {
 540                        IndexWriter.Unlock(directory);
 541
 542                        if (directory.FileExists("write.lock"))
 543                            directory.DeleteFile("write.lock");
 544
 545                        CheckIndexAndTryToFix(directory, indexDefinition);
 546
 547                        if (directory.FileExists("writing-to-index.lock"))
 548                            directory.DeleteFile("writing-to-index.lock");
 549                    }
 550                }
 551            }
 552
 553            return directory;
 554
 555        }
 556
 557        private void RegenerateMapReduceIndex(Directory directory, IndexDefinition indexDefinition)
 558        {
 559            // remove old index data
 560            var dirOnDisk = Path.Combine(path, indexDefinition.IndexId.ToString());
 561            IOExtensions.DeleteDirectory(dirOnDisk);
 562
 563            // initialize by new index
 564            System.IO.Directory.CreateDirectory(dirOnDisk);
 565            WriteIndexVersion(directory, indexDefinition);
 566            new IndexWriter(directory, dummyAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED).Dispose();
 567
 568            var start = 0;
 569            const int take = 100;
 570
 571            documentDatabase.TransactionalStorage.Batch(actions =>
 572            {
 573                IList<ReduceTypePerKey> reduceKeysAndTypes;
 574
 575                do
 576                {
 577                    reduceKeysAndTypes = actions.MapReduce.GetReduceKeysAndTypes(indexDefinition.IndexId, start, take).ToList();
 578                    start += take;
 579
 580                    var keysToScheduleOnLevel2 =
 581                        reduceKeysAndTypes.Where(x => x.OperationTypeToPerform == ReduceType.MultiStep).ToList();
 582                    var keysToScheduleOnLevel0 =
 583                        reduceKeysAndTypes.Where(x => x.OperationTypeToPerform == ReduceType.SingleStep).ToList();
 584
 585                    var itemsToScheduleOnLevel2 = keysToScheduleOnLevel2.Select(x => new ReduceKeyAndBucket(0, x.ReduceKey)).ToList();
 586                    var itemsToScheduleOnLevel0 = new List<ReduceKeyAndBucket>();
 587
 588                    foreach (var reduceKey in keysToScheduleOnLevel0.Select(x => x.ReduceKey))
 589                    {
 590                        var mappedBuckets = actions.MapReduce.GetMappedBuckets(indexDefinition.IndexId, reduceKey, CancellationToken.None).Distinct();
 591
 592                        itemsToScheduleOnLevel0.AddRange(mappedBuckets.Select(x => new ReduceKeyAndBucket(x, reduceKey)));
 593                    }
 594
 595                    foreach (var itemToReduce in itemsToScheduleOnLevel2)
 596                    {
 597                        actions.MapReduce.ScheduleReductions(indexDefinition.IndexId, 2, itemToReduce);
 598                        actions.General.MaybePulseTransaction();
 599                    }
 600
 601                    foreach (var itemToReduce in itemsToScheduleOnLevel0)
 602                    {
 603                        actions.MapReduce.ScheduleReductions(indexDefinition.IndexId, 0, itemToReduce);
 604                        actions.General.MaybePulseTransaction();
 605                    }
 606
 607                } while (reduceKeysAndTypes.Count > 0);
 608            });
 609        }
 610
 611        private void ResetLastIndexedEtag(IndexDefinition indexDefinition, Etag lastIndexedEtag, DateTime timestamp)
 612        {
 613            documentDatabase.TransactionalStorage.Batch(
 614                accessor =>
 615                accessor.Indexing.UpdateLastIndexed(indexDefinition.IndexId, lastIndexedEtag, timestamp));
 616        }
 617
 618        internal Etag GetLastEtagForIndex(Index index)
 619        {
 620            if (index.IsMapReduce)
 621                return null;
 622
 623            IndexStats stats = null;
 624            documentDatabase.TransactionalStorage.Batch(accessor => stats = accessor.Indexing.GetIndexStats(index.IndexId));
 625
 626            return stats != null ? stats.LastIndexedEtag : Etag.Empty;
 627        }
 628
 629        public static string IndexVersionFileName(IndexDefinition indexDefinition)
 630        {
 631            if (indexDefinition.IsMapReduce)
 632                return "mapReduce.version";
 633            return "index.version";
 634        }
 635
 636        public static void WriteIndexVersion(Directory directory, IndexDefinition indexDefinition)
 637        {
 638            var version = IndexVersion;
 639            if (indexDefinition.IsMapReduce)
 640            {
 641                version = MapReduceIndexVersion;
 642            }
 643            using (var indexOutput = directory.CreateOutput(IndexVersionFileName(indexDefinition)))
 644            {
 645                indexOutput.WriteString(version);
 646                indexOutput.Flush();
 647            }
 648        }
 649
 650        private static void EnsureIndexVersionMatches(Directory directory, IndexDefinition indexDefinition)
 651        {
 652            var versionToCheck = IndexVersion;
 653            if (indexDefinition.IsMapReduce)
 654            {
 655                versionToCheck = MapReduceIndexVersion;
 656            }
 657            var indexVersion = IndexVersionFileName(indexDefinition);
 658            if (directory.FileExists(indexVersion) == false)
 659            {
 660                throw new InvalidOperationException("Could not find " + indexVersion + " " + indexDefinition.IndexId + ", resetting index");
 661            }
 662            using (var indexInput = directory.OpenInput(indexVersion))
 663            {
 664                var versionFromDisk = indexInput.ReadString();
 665                if (versionFromDisk != versionToCheck)
 666                    throw new InvalidOperationException("Index " + indexDefinition.IndexId + " is of version " + versionFromDisk +
 667                                                        " which is not compatible with " + versionToCheck + ", resetting index");
 668            }
 669        }
 670
 671        private static void CheckIndexAndTryToFix(Directory directory, IndexDefinition indexDefinition)
 672        {
 673            startupLog.Warn("Unclean shutdown detected on {0}, checking the index for errors. This may take a while.", indexDefinition.Name);
 674
 675            var memoryStream = new MemoryStream();
 676            var stringWriter = new StreamWriter(memoryStream);
 677            var checkIndex = new CheckIndex(directory);
 678
 679            if (startupLog.IsWarnEnabled)
 680                checkIndex.SetInfoStream(stringWriter);
 681
 682            var sp = Stopwatch.StartNew();
 683            var status = checkIndex.CheckIndex_Renamed_Method();
 684            sp.Stop();
 685            if (startupLog.IsWarnEnabled)
 686            {
 687                startupLog.Warn("Checking index {0} took: {1}, clean: {2}", indexDefinition.Name, sp.Elapsed, status.clean);
 688                memoryStream.Position = 0;
 689
 690                log.Warn(new StreamReader(memoryStream).ReadToEnd());
 691            }
 692
 693            if (status.clean)
 694                return;
 695
 696            startupLog.Warn("Attempting to fix index: {0}", indexDefinition.Name);
 697            sp.Restart();
 698            checkIndex.FixIndex(status);
 699            startupLog.Warn("Fixed index {0} in {1}", indexDefinition.Name, sp.Elapsed);
 700        }
 701
 702        public void StoreCommitPoint(string indexName, IndexCommitPoint indexCommit)
 703        {
 704            if (indexCommit.SegmentsInfo == null || indexCommit.SegmentsInfo.IsIndexCorrupted)
 705                return;
 706
 707            var directoryName = indexCommit.SegmentsInfo.Generation.ToString("0000000000000000000", CultureInfo.InvariantCulture);
 708            var commitPointDirectory = new IndexCommitPointDirectory(path, indexName, directoryName);
 709
 710            if (System.IO.Directory.Exists(commitPointDirectory.AllCommitPointsFullPath) == false)
 711            {
 712                System.IO.Directory.CreateDirectory(commitPointDirectory.AllCommitPointsFullPath);
 713            }
 714
 715            System.IO.Directory.CreateDirectory(commitPointDirectory.FullPath);
 716
 717            using (var commitPointFile = File.Create(commitPointDirectory.FileFullPath))
 718            using (var sw = new StreamWriter(commitPointFile))
 719            {
 720                var jsonSerializer = JsonExtensions.CreateDefaultJsonSerializer();
 721                var textWriter = new JsonTextWriter(sw);
 722
 723                jsonSerializer.Serialize(textWriter, indexCommit);
 724                sw.Flush();
 725            }
 726
 727            var currentSegmentsFileName = indexCommit.SegmentsInfo.SegmentsFileName;
 728
 729            File.Copy(Path.Combine(commitPointDirectory.IndexFullPath, currentSegmentsFileName),
 730                        Path.Combine(commitPointDirectory.FullPath, currentSegmentsFileName),
 731                        overwrite: true);
 732
 733            var storedCommitPoints = System.IO.Directory.GetDirectories(commitPointDirectory.AllCommitPointsFullPath);
 734
 735            if (storedCommitPoints.Length > configuration.Indexing.MaxNumberOfStoredCommitPoints)
 736            {
 737                foreach (var toDelete in storedCommitPoints.Take(storedCommitPoints.Length - configuration.Indexing.MaxNumberOfStoredCommitPoints))
 738                {
 739                    IOExtensions.DeleteDirectory(toDelete);
 740                }
 741            }
 742        }
 743
 744        public void AddDeletedKeysToCommitPoints(IndexDefinition indexDefinition, string[] deletedKeys)
 745        {
 746            var indexFullPath = Path.Combine(path, indexDefinition.IndexId.ToString());
 747
 748            var existingCommitPoints = IndexCommitPointDirectory.ScanAllCommitPointsDirectory(indexFullPath);
 749
 750            foreach (var commitPointDirectory in existingCommitPoints.Select(commitPoint => new IndexCommitPointDirectory(path, indexDefinition.IndexId.ToString(), commitPoint)))
 751            {
 752                using (var stream = File.Open(commitPointDirectory.DeletedKeysFile, FileMode.OpenOrCreate))
 753                {
 754                    stream.Seek(0, SeekOrigin.End);
 755                    using (var writer = new StreamWriter(stream))
 756                    {
 757                        foreach (var deletedKey in deletedKeys)
 758                        {
 759                            writer.WriteLine(deletedKey);
 760                        }
 761                    }
 762                }
 763            }
 764        }
 765
 766        private bool TryReusePreviousCommitPointsToRecoverIndex(Directory directory, IndexDefinition indexDefinition, string indexStoragePath, out IndexCommitPoint indexCommit, out string[] keysToDelete)
 767        {
 768            indexCommit = null;
 769            keysToDelete = null;
 770
 771            if (indexDefinition.IsMapReduce)
 772                return false;
 773
 774            var indexFullPath = Path.Combine(indexStoragePath, indexDefinition.IndexId.ToString());
 775
 776
 777
 778            var allCommitPointsFullPath = IndexCommitPointDirectory.GetAllCommitPointsFullPath(indexFullPath);
 779
 780            if (System.IO.Directory.Exists(allCommitPointsFullPath) == false)
 781                return false;
 782
 783            var filesInIndexDirectory = System.IO.Directory.GetFiles(indexFullPath).Select(Path.GetFileName);
 784
 785            var existingCommitPoints =
 786                IndexCommitPointDirectory.ScanAllCommitPointsDirectory(indexFullPath);
 787
 788            Array.Reverse(existingCommitPoints); // start from the highest generation
 789
 790            foreach (var commitPointDirectoryName in existingCommitPoints)
 791            {
 792                try
 793                {
 794                    var commitPointDirectory = new IndexCommitPointDirectory(indexStoragePath, indexDefinition.IndexId.ToString(),
 795                                                                                commitPointDirectoryName);
 796
 797                    if (TryGetCommitPoint(commitPointDirectory, out indexCommit) == false)
 798                    {
 799                        IOExtensions.DeleteDirectory(commitPointDirectory.FullPath);
 800                        continue; // checksum is invalid, try another commit point
 801                    }
 802
 803                    var missingFile =
 804                        indexCommit.SegmentsInfo.ReferencedFiles.Any(
 805                            referencedFile => filesInIndexDirectory.Contains(referencedFile) == false);
 806
 807                    if (missingFile)
 808                    {
 809                        IOExtensions.DeleteDirectory(commitPointDirectory.FullPath);
 810                        continue; // there are some missing files, try another commit point
 811                    }
 812
 813                    var storedSegmentsFile = indexCommit.SegmentsInfo.SegmentsFileName;
 814
 815                    // here there should be only one segments_N file, however remove all if there is more
 816                    foreach (var currentSegmentsFile in System.IO.Directory.GetFiles(commitPointDirectory.IndexFullPath, "segments_*"))
 817                    {
 818                        File.Delete(currentSegmentsFile);
 819                    }
 820
 821                    // copy old segments_N file
 822                    File.Copy(Path.Combine(commitPointDirectory.FullPath, storedSegmentsFile),
 823                              Path.Combine(commitPointDirectory.IndexFullPath, storedSegmentsFile), true);
 824
 825                    try
 826                    {
 827                        // update segments.gen file
 828                        using (var genOutput = directory.CreateOutput(IndexFileNames.SEGMENTS_GEN))
 829                        {
 830                            genOutput.WriteInt(SegmentInfos.FORMAT_LOCKLESS);
 831                            genOutput.WriteLong(indexCommit.SegmentsInfo.Generation);
 832                            genOutput.WriteLong(indexCommit.SegmentsInfo.Generation);
 833                        }
 834                    }
 835                    catch (Exception)
 836                    {
 837                        // here we can ignore, segments.gen is used only as fallback
 838                    }
 839
 840                    if (File.Exists(commitPointDirectory.DeletedKeysFile))
 841                        keysToDelete = File.ReadLines(commitPointDirectory.DeletedKeysFile).ToArray();
 842
 843                    return true;
 844                }
 845                catch (Exception ex)
 846                {
 847                    startupLog.WarnException("Could not recover an index named '" + indexDefinition.IndexId +
 848                                       "'from segments of the following generation " + commitPointDirectoryName, ex);
 849                }
 850            }
 851
 852            return false;
 853        }
 854
 855        public static IndexSegmentsInfo GetCurrentSegmentsInfo(string indexName, Directory directory)
 856        {
 857            var segmentInfos = new SegmentInfos();
 858            var result = new IndexSegmentsInfo();
 859
 860            try
 861            {
 862                segmentInfos.Read(directory);
 863
 864                result.Generation = segmentInfos.Generation;
 865                result.SegmentsFileName = segmentInfos.GetCurrentSegmentFileName();
 866                result.ReferencedFiles = segmentInfos.Files(directory, false);
 867            }
 868            catch (CorruptIndexException ex)
 869            {
 870                log.WarnException(string.Format("Could not read segment information for an index '{0}'", indexName), ex);
 871
 872                result.IsIndexCorrupted = true;
 873            }
 874
 875            return result;
 876        }
 877
 878        public static bool TryGetCommitPoint(IndexCommitPointDirectory commitPointDirectory, out IndexCommitPoint indexCommit)
 879        {
 880            using (var commitPointFile = File.OpenRead(commitPointDirectory.FileFullPath))
 881            {
 882                try
 883                {
 884                    var textReader = new JsonTextReader(new StreamReader(commitPointFile));
 885                    var jsonCommitPoint = RavenJObject.Load(textReader);
 886                    var jsonEtag = jsonCommitPoint.Value<RavenJToken>("HighestCommitedETag");
 887
 888                    Etag recoveredEtag = null;
 889                    if (jsonEtag.Type == JTokenType.Object) // backward compatibility - HighestCommitedETag is written as {"Restarts":123,"Changes":1}
 890                    {
 891                        jsonCommitPoint.Remove("HighestCommitedETag");
 892                        recoveredEtag = new Etag(UuidType.Documents, jsonEtag.Value<long>("Restarts"), jsonEtag.Value<long>("Changes"));
 893                    }
 894
 895                    indexCommit = jsonCommitPoint.JsonDeserialization<IndexCommitPoint>();
 896
 897                    if (indexCommit == null)
 898                        return false;
 899
 900                    if (recoveredEtag != null)
 901                        indexCommit.HighestCommitedETag = recoveredEtag;
 902
 903                    if (indexCommit.HighestCommitedETag == null || indexCommit.HighestCommitedETag.CompareTo(Etag.Empty) == 0)
 904                        return false;
 905
 906                    return true;
 907                }
 908                catch (Exception e)
 909                {
 910                    log.Warn("Could not get commit point from the following location {0}. Exception {1}", commitPointDirectory.FileFullPath, e);
 911
 912                    indexCommit = null;
 913                    return false;
 914                }
 915            }
 916        }
 917
 918        internal Directory MakeRAMDirectoryPhysical(RAMDirectory ramDir, IndexDefinition indexDefinition)
 919        {
 920            var newDir = new LuceneCodecDirectory(Path.Combine(path, indexDefinition.IndexId.ToString()), documentDatabase.IndexCodecs.OfType<AbstractIndexCodec>());
 921            Directory.Copy(ramDir, newDir, false);
 922            return newDir;
 923        }
 924
 925        private Index CreateIndexImplementation(IndexDefinition indexDefinition, Directory directory)
 926        {
 927            var viewGenerator = indexDefinitionStorage.GetViewGenerator(indexDefinition.IndexId);
 928            var indexImplementation = indexDefinition.IsMapReduce
 929                                        ? (Index)new MapReduceIndex(directory, indexDefinition.IndexId, indexDefinition, viewGenerator, documentDatabase.WorkContext)
 930                                        : new SimpleIndex(directory, indexDefinition.IndexId, indexDefinition, viewGenerator, documentDatabase.WorkContext);
 931
 932            configuration.Container.SatisfyImportsOnce(indexImplementation);
 933
 934            return indexImplementation;
 935        }
 936
 937        public int[] Indexes
 938        {
 939            get { return indexes.Keys.ToArray(); }
 940        }
 941
 942        public string[] IndexNames
 943        {
 944            get { return indexes.Values.Select(x => x.PublicName).ToArray(); }
 945        }
 946
 947        public bool HasIndex(string index)
 948        {
 949            if (index == null)
 950                return false;
 951            return indexes.Any(x => String.Compare(index, x.Value.PublicName, StringComparison.OrdinalIgnoreCase) == 0);
 952        }
 953
 954        public void Dispose()
 955        {
 956            var exceptionAggregator = new ExceptionAggregator(log, "Could not properly close index storage");
 957
 958            exceptionAggregator.Execute(FlushMapIndexes);
 959            exceptionAggregator.Execute(FlushReduceIndexes);
 960
 961            exceptionAggregator.Execute(() => Parallel.ForEach(indexes.Values, index => exceptionAggregator.Execute(index.Dispose)));
 962
 963            exceptionAggregator.Execute(() => dummyAnalyzer.Close());
 964
 965            exceptionAggregator.Execute(() =>
 966            {
 967                if (crashMarker != null)
 968                    crashMarker.Dispose();
 969            });
 970
 971            exceptionAggregator.ThrowIfNeeded();
 972        }
 973
 974        public void DeleteIndex(string name)
 975        {
 976            var value = TryIndexByName(name);
 977            if (value == null)
 978                return;
 979            DeleteIndex(value.indexId);
 980        }
 981
 982        public void DeleteIndex(int id)
 983        {
 984            var value = GetIndexInstance(id);
 985            if (value == null)
 986            {
 987                if (log.IsDebugEnabled)
 988                    log.Debug("Ignoring delete for non existing index {0}", id);
 989                return;
 990            }
 991            documentDatabase.TransactionalStorage.Batch(accessor =>
 992              accessor.Lists.Remove("Raven/Indexes/QueryTime", value.PublicName));
 993            if (log.IsDebugEnabled)
 994                log.Debug("Deleting index {0}", value.PublicName);
 995            value.Dispose();
 996            Index ignored;
 997
 998            var dirOnDisk = Path.Combine(path, id.ToString());
 999            if (!indexes.TryRemove(id, out ignored) || !System.IO.Directory.Exists(dirOnDisk))
1000                return;
1001
1002            UpdateIndexMappingFile();
1003        }
1004
1005        public void DeleteIndexData(int id)
1006        {
1007            var dirOnDisk = Path.Combine(path, id.ToString(CultureInfo.InvariantCulture));
1008            IOExtensions.DeleteDirectory(dirOnDisk);
1009        }
1010
1011        public Index ReopenCorruptedIndex(Index index)
1012        {
1013            if (index.Priority != IndexingPriority.Error)
1014                throw new InvalidOperationException(string.Format("Index {0} isn't errored", index.PublicName));
1015
1016            index.Dispose();
1017
1018            var reopened = OpenIndex(index.PublicName, onStartup: false, forceFullIndexCheck: true);
1019
1020            if (reopened == null)
1021                throw new InvalidOperationException("Reopened index cannot be null instance. Index name:" + index.PublicName);
1022
1023            return indexes.AddOrUpdate(reopened.IndexId, n => reopened, (s, existigIndex) => reopened);
1024        }
1025
1026        public void CreateIndexImplementation(IndexDefinition indexDefinition)
1027        {
1028            if (log.IsDebugEnabled)
1029                log.Debug("Creating index {0} with id {1}", indexDefinition.IndexId, indexDefinition.Name);
1030
1031            IndexDefinitionStorage.ResolveAnalyzers(indexDefinition);
1032
1033            if (TryIndexByName(indexDefinition.Name) != null)
1034            {
1035                throw new InvalidOperationException("Index " + indexDefinition.Name + " already exists");
1036            }
1037
1038            var addedIndex = indexes.AddOrUpdate(indexDefinition.IndexId, n =>
1039        {
1040            var directory = OpenOrCreateLuceneDirectory(indexDefinition);
1041            return CreateIndexImplementation(indexDefinition, directory);
1042        }, (s, index) => index);
1043
1044            //prevent corrupted index when creating a map-reduce index
1045            //need to do this for every map reduce index, even when indexing is enabled,
1046            if (addedIndex.IsMapReduce)
1047            {
1048                addedIndex.EnsureIndexWriter();
1049                addedIndex.Flush(Etag.Empty);
1050            }
1051
1052            UpdateIndexMappingFile();
1053        }
1054
1055        public Query GetDocumentQuery(string index, IndexQuery query, OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers)
1056        {
1057            var value = TryIndexByName(index);
1058            if (value == null)
1059            {
1060                if (log.IsDebugEnabled)
1061                    log.Debug("Query on non existing index {0}", index);
1062                throw new InvalidOperationException("Index '" + index + "' does not exists");
1063            }
1064            var fieldsToFetch = new FieldsToFetch(new string[0], false, null);
1065            return new Index.IndexQueryOperation(value, query, _ => false, fieldsToFetch, indexQueryTriggers).GetDocumentQuery();
1066        }
1067
1068        private Index TryIndexByName(string name)
1069        {
1070            return indexes.Where(index => String.Compare(index.Value.PublicName, name, StringComparison.OrdinalIgnoreCase) == 0)
1071            .Select(x => x.Value)
1072            .FirstOrDefault();
1073        }
1074
1075        public IEnumerable<IndexQueryResult> Query(string index,
1076            IndexQuery query,
1077            Func<IndexQueryResult, bool> shouldIncludeInResults,
1078            FieldsToFetch fieldsToFetch,
1079            OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers,
1080            CancellationToken token,
1081            Action<double> parseTiming = null
1082            )
1083        {
1084            Index value = TryIndexByName(index);
1085            if (value == null)
1086            {
1087                if (log.IsDebugEnabled)
1088                    log.Debug("Query on non existing index '{0}'", index);
1089                throw new InvalidOperationException("Index '" + index + "' does not exists");
1090            }
1091
1092            if ((value.Priority.HasFlag(IndexingPriority.Idle) || value.Priority.HasFlag(IndexingPriority.Abandoned)) &&
1093                value.Priority.HasFlag(IndexingPriority.Forced) == false)
1094            {
1095                documentDatabase.TransactionalStorage.Batch(accessor =>
1096                {
1097                    value.Priority = IndexingPriority.Normal;
1098                    try
1099                    {
1100                        accessor.Indexing.SetIndexPriority(value.indexId, IndexingPriority.Normal);
1101                    }
1102                    catch (Exception e)
1103                    {
1104                        if (accessor.IsWriteConflict(e) == false)
1105                            throw;
1106
1107                        // we explciitly ignore write conflicts here, it is okay if we got set twice (two concurrent queries, or setting while indexing).
1108                    }
1109                    documentDatabase.WorkContext.ShouldNotifyAboutWork(() => "Idle index q…

Large files files are truncated, but you can click here to view the full file