PageRenderTime 51ms CodeModel.GetById 3ms app.highlight 37ms RepoModel.GetById 1ms app.codeStats 1ms

/ToMigrate/Raven.Database/Indexing/Index.cs

Relevant Search: With Applications for Solr and Elasticsearch

'Chapter 4. Taming tokens'. If you want to know how to extract ideas rather than words this book is for you. Learn concepts of precision and recall, making trade-offs between them and controlling the specificity of matches. Amazon Affiliate Link
http://github.com/ayende/ravendb
C# | 2107 lines | 1791 code | 260 blank | 56 comment | 292 complexity | ea98209eeedfce2c87d8653c618d85cd MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1//-----------------------------------------------------------------------
   2// <copyright file="Index.cs" company="Hibernating Rhinos LTD">
   3//     Copyright (c) Hibernating Rhinos LTD. All rights reserved.
   4// </copyright>
   5//-----------------------------------------------------------------------
   6using System;
   7using System.Collections;
   8using System.Collections.Concurrent;
   9using System.Collections.Generic;
  10using System.Collections.Specialized;
  11using System.ComponentModel.Composition;
  12using System.Diagnostics;
  13using System.IO;
  14using System.Linq;
  15using System.Text;
  16using System.Threading;
  17using System.Threading.Tasks;
  18using System.Web.UI;
  19using Lucene.Net.Analysis;
  20using Lucene.Net.Analysis.Standard;
  21using Lucene.Net.Documents;
  22using Lucene.Net.Index;
  23using Lucene.Net.Search;
  24using Lucene.Net.Search.Vectorhighlight;
  25using Lucene.Net.Store;
  26using Lucene.Net.Util;
  27using Raven.Abstractions;
  28using Raven.Abstractions.Data;
  29using Raven.Abstractions.Exceptions;
  30using Raven.Abstractions.Extensions;
  31using Raven.Abstractions.Indexing;
  32using Raven.Abstractions.Json.Linq;
  33using Raven.Abstractions.Linq;
  34using Raven.Abstractions.Logging;
  35using Raven.Abstractions.MEF;
  36using Raven.Database.Config;
  37using Raven.Database.Config.Settings;
  38using Raven.Database.Data;
  39using Raven.Database.Extensions;
  40using Raven.Database.Indexing.Analyzers;
  41using Raven.Database.Linq;
  42using Raven.Database.Plugins;
  43using Raven.Database.Storage;
  44using Raven.Database.Tasks;
  45using Raven.Database.Util;
  46using Raven.Json.Linq;
  47using Constants = Raven.Abstractions.Data.Constants;
  48using Directory = Lucene.Net.Store.Directory;
  49using Document = Lucene.Net.Documents.Document;
  50using Field = Lucene.Net.Documents.Field;
  51using Version = Lucene.Net.Util.Version;
  52
  53namespace Raven.Database.Indexing
  54{
  55    /// <summary>
  56    /// 	This is a thread safe, single instance for a particular index.
  57    /// </summary>
  58    public abstract class Index : IDisposable, ILowMemoryHandler
  59    {
  60        protected static readonly ILog logIndexing = LogManager.GetLogger(typeof(Index).FullName + ".Indexing");
  61        protected static readonly ILog logQuerying = LogManager.GetLogger(typeof(Index).FullName + ".Querying");
  62
  63        private const long WriteErrorsLimit = 10;
  64
  65        private readonly List<Document> currentlyIndexDocuments = new List<Document>();
  66        protected Directory directory;
  67        protected readonly IndexDefinition indexDefinition;
  68        private volatile string waitReason;
  69        private readonly Size flushSize;
  70        private long writeErrors;
  71        // Users sometimes configure index outputs without realizing that we need to count on that for memory 
  72        // management. That can result in very small batch sizes, so we want to make sure that we don't trust
  73        // the user configuration, and use what is actually going on
  74        private int maxActualIndexOutput = 1;
  75
  76        public IndexingPriority Priority { get; set; }
  77        /// <summary>
  78        /// Note, this might be written to be multiple threads at the same time
  79        /// We don't actually care for exact timing, it is more about general feeling
  80        /// </summary>
  81        private DateTime? lastQueryTime;
  82
  83        private readonly ConcurrentDictionary<string, IIndexExtension> indexExtensions =
  84            new ConcurrentDictionary<string, IIndexExtension>();
  85
  86        internal readonly int indexId;
  87
  88        public int IndexId
  89        {
  90            get { return indexId; }
  91        }
  92
  93        private readonly AbstractViewGenerator viewGenerator;
  94        protected readonly WorkContext context;
  95
  96        private readonly object writeLock = new object();
  97        private volatile bool disposed;
  98        private RavenIndexWriter indexWriter;
  99        private SnapshotDeletionPolicy snapshotter;
 100        private readonly IndexSearcherHolder currentIndexSearcherHolder;
 101
 102        private readonly ConcurrentDictionary<string, IndexingPerformanceStats> currentlyIndexing = new ConcurrentDictionary<string, IndexingPerformanceStats>();
 103        private readonly ConcurrentQueue<IndexingPerformanceStats> indexingPerformanceStats = new ConcurrentQueue<IndexingPerformanceStats>();
 104        private readonly static StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_30);
 105        private bool forceWriteToDisk;
 106
 107        [CLSCompliant(false)]
 108        protected Index(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
 109        {
 110            currentIndexSearcherHolder = new IndexSearcherHolder(id, context);
 111            if (directory == null) throw new ArgumentNullException("directory");
 112            if (indexDefinition == null) throw new ArgumentNullException("indexDefinition");
 113            if (viewGenerator == null) throw new ArgumentNullException("viewGenerator");
 114
 115            this.indexId = id;
 116            this.indexDefinition = indexDefinition;
 117            this.viewGenerator = viewGenerator;
 118            this.context = context;
 119            if (logIndexing.IsDebugEnabled)
 120                logIndexing.Debug("Creating index for {0}", PublicName);
 121            this.directory = directory;
 122            flushSize = context.Configuration.Indexing.FlushIndexToDiskSize;
 123            _indexCreationTime = SystemTime.UtcNow;
 124            RecreateSearcher();
 125
 126            MemoryStatistics.RegisterLowMemoryHandler(this);
 127        }
 128        public int CurrentNumberOfItemsToIndexInSingleBatch { get; set; }
 129
 130        [ImportMany]
 131        public OrderedPartCollection<AbstractAnalyzerGenerator> AnalyzerGenerators { get; set; }
 132
 133        /// <summary>
 134        /// Whatever this is a map reduce index or not
 135        /// </summary>
 136        public abstract bool IsMapReduce { get; }
 137
 138        public DateTime? LastQueryTime
 139        {
 140            get
 141            {
 142                return lastQueryTime;
 143            }
 144        }
 145
 146        public DateTime LastIndexTime { get; set; }
 147
 148        protected DateTime PreviousIndexTime { get; set; }
 149
 150        public string IsOnRam
 151        {
 152            get
 153            {
 154                var ramDirectory = directory as RAMDirectory;
 155                if (ramDirectory == null)
 156                    return "false";
 157                try
 158                {
 159                    return "true (" + SizeHelper.Humane(ramDirectory.SizeInBytes()) + ")";
 160                }
 161                catch (AlreadyClosedException)
 162                {
 163                    return "false";
 164                }
 165            }
 166        }
 167
 168        public string PublicName { get { return indexDefinition.Name; } }
 169
 170        public bool IsTestIndex
 171        {
 172            get { return indexDefinition.IsTestIndex; }
 173        }
 174
 175        public int? MaxIndexOutputsPerDocument
 176        {
 177            get
 178            {
 179                if (maxActualIndexOutput == 1)
 180                    return null;
 181                return maxActualIndexOutput;
 182            }
 183        }
 184
 185        [CLSCompliant(false)]
 186        public volatile bool IsMapIndexingInProgress;
 187        private DateTime _indexCreationTime;
 188
 189        protected IndexingPerformanceStats RecordCurrentBatch(string indexingStep, string operation, int itemsCount)
 190        {
 191            var performanceStats = new IndexingPerformanceStats
 192            {
 193                ItemsCount = itemsCount,
 194                Operation = indexingStep,
 195                Started = SystemTime.UtcNow,
 196                Operations = new BasePerformanceStats[0]
 197            };
 198
 199            var lastStats = indexingPerformanceStats.LastOrDefault(x => x.Operation.Equals(operation, StringComparison.OrdinalIgnoreCase));
 200
 201            if (lastStats != null)
 202                performanceStats.WaitingTimeSinceLastBatchCompleted = performanceStats.Started - lastStats.Completed;
 203
 204            currentlyIndexing.AddOrUpdate(indexingStep, performanceStats, (s, stats) => performanceStats);
 205
 206            return performanceStats;
 207        }
 208
 209        protected void BatchCompleted(string indexingStep, string operation, int inputCount, int outputCount, List<BasePerformanceStats> operationStats)
 210        {
 211            IndexingPerformanceStats stats;
 212            if (currentlyIndexing.TryRemove(indexingStep, out stats))
 213            {
 214                stats.Completed = SystemTime.UtcNow;
 215                stats.Duration = stats.Completed - stats.Started;
 216                stats.Operation = operation;
 217
 218                stats.InputCount = inputCount;
 219                stats.OutputCount = outputCount;
 220                stats.Operations = operationStats.ToArray();
 221
 222                AddIndexingPerformanceStats(stats);
 223            }
 224        }
 225
 226        public void AddIndexingPerformanceStats(IndexingPerformanceStats stats)
 227        {
 228            indexingPerformanceStats.Enqueue(stats);
 229
 230            while (indexingPerformanceStats.Count > 25)
 231                indexingPerformanceStats.TryDequeue(out stats);
 232        }
 233
 234        public void Dispose()
 235        {
 236            try
 237            {
 238                // this is here so we can give good logs in the case of a long shutdown process
 239                if (Monitor.TryEnter(writeLock, 100) == false)
 240                {
 241                    var localReason = waitReason;
 242                    if (localReason != null)
 243                        logIndexing.Warn("Waiting for {0} to complete before disposing of index {1}, that might take a while if the server is very busy",
 244                         localReason, PublicName);
 245
 246                    Monitor.Enter(writeLock);
 247                }
 248
 249                disposed = true;
 250
 251                foreach (var indexExtension in indexExtensions)
 252                {
 253                    indexExtension.Value.Dispose();
 254                }
 255
 256                if (currentIndexSearcherHolder != null)
 257                {
 258                    var item = currentIndexSearcherHolder.SetIndexSearcher(null, PublicName, wait: true);
 259                    if (item.WaitOne(TimeSpan.FromSeconds(5)) == false)
 260                    {
 261                        logIndexing.Warn("After closing the index searching, we waited for 5 seconds for the searching to be done, but it wasn't. Continuing with normal shutdown anyway.");
 262                    }
 263                }
 264
 265                try
 266                {
 267                    EnsureIndexWriter();
 268                    ForceWriteToDisk();
 269                    WriteInMemoryIndexToDiskIfNecessary(GetLastEtagFromStats());
 270                }
 271                catch (Exception e)
 272                {
 273                    logIndexing.ErrorException("Error while writing in memory index to disk.", e);
 274                }
 275
 276                if (indexWriter != null) // just in case, WriteInMemoryIndexToDiskIfNecessary recreates writer
 277                {
 278                    var writer = indexWriter;
 279                    indexWriter = null;
 280
 281                    try
 282                    {
 283                        writer.Analyzer.Close();
 284                    }
 285                    catch (Exception e)
 286                    {
 287                        logIndexing.ErrorException("Error while closing the index (closing the analyzer failed)", e);
 288                    }
 289
 290                    try
 291                    {
 292                        writer.Dispose();
 293                    }
 294                    catch (Exception e)
 295                    {
 296                        logIndexing.ErrorException("Error when closing the index", e);
 297                    }
 298                }
 299
 300                try
 301                {
 302                    directory.Dispose();
 303                }
 304                catch (Exception e)
 305                {
 306                    logIndexing.ErrorException("Error when closing the directory", e);
 307                }
 308            }
 309            finally
 310            {
 311                Monitor.Exit(writeLock);
 312            }
 313        }
 314
 315        public void EnsureIndexWriter()
 316        {
 317            try
 318            {
 319                if (indexWriter == null)
 320                    CreateIndexWriter();
 321            }
 322            catch (IOException e)
 323            {
 324                string msg = string.Format("Error when trying to create the index writer for index '{0}'.", this.PublicName);
 325                throw new IOException(msg, e);
 326            }
 327        }
 328
 329        public void Flush(Etag highestETag)
 330        {
 331            try
 332            {
 333                lock (writeLock)
 334                {
 335                    if (disposed)
 336                        return;
 337                    if (indexWriter == null)
 338                        return;
 339                    if (context.IndexStorage == null)
 340                        return;
 341
 342                    waitReason = "Flush";
 343                    try
 344                    {
 345                        try
 346                        {
 347                        indexWriter.Commit(highestETag);
 348                        }
 349                        catch (Exception e)
 350                        {
 351                            HandleWriteError(e);
 352                            throw;
 353                        }
 354
 355                        ResetWriteErrors();
 356                    }
 357                    finally
 358                    {
 359                        waitReason = null;
 360                    }
 361                }
 362            }
 363            catch (Exception e)
 364            {
 365                HandleWriteError(e);
 366                throw new IOException("Error during flush for " + PublicName, e);
 367            }
 368        }
 369
 370        public void MergeSegments()
 371        {
 372            lock (writeLock)
 373            {
 374                waitReason = "Merge / Optimize";
 375                try
 376                {
 377                    logIndexing.Info("Starting merge of {0}", PublicName);
 378                    var sp = Stopwatch.StartNew();
 379                    
 380                    EnsureIndexWriter();
 381
 382                    try
 383                    {
 384                    indexWriter.Optimize();
 385                }
 386                catch (Exception e)
 387                {
 388                        HandleWriteError(e);
 389                    throw;
 390                }
 391
 392                    logIndexing.Info("Done merging {0} - took {1}", indexId, sp.Elapsed);
 393
 394                    ResetWriteErrors();
 395                }
 396                finally
 397                {
 398                    waitReason = null;
 399                }
 400            }
 401        }
 402
 403        public abstract IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token);
 404
 405        protected virtual IndexQueryResult RetrieveDocument(Document document, FieldsToFetch fieldsToFetch, ScoreDoc score)
 406        {
 407            return new IndexQueryResult
 408            {
 409                Score = score.Score,
 410                Key = document.Get(Constants.DocumentIdFieldName),
 411                Projection = (fieldsToFetch.IsProjection || fieldsToFetch.FetchAllStoredFields) ? CreateDocumentFromFields(document, fieldsToFetch) : null
 412            };
 413        }
 414
 415        public static RavenJObject CreateDocumentFromFields(Document document, FieldsToFetch fieldsToFetch)
 416        {
 417            var documentFromFields = new RavenJObject();
 418            var fields = fieldsToFetch.Fields;
 419            if (fieldsToFetch.FetchAllStoredFields)
 420                fields = fields.Concat(document.GetFields().Select(x => x.Name));
 421
 422            AddFieldsToDocument(document, new HashSet<string>(fields), documentFromFields);
 423            return documentFromFields;
 424        }
 425
 426        protected static void AddFieldsToDocument(Document document, HashSet<string> fieldNames, RavenJObject documentFromFields)
 427        {
 428            foreach (var fldName in fieldNames)
 429            {
 430                if (fldName.EndsWith("_IsArray") ||
 431                    fldName.EndsWith("_Range") ||
 432                    fldName.EndsWith("_ConvertToJson"))
 433                    continue;
 434
 435                var isArray = fldName + "_IsArray";
 436                foreach (var field in document.GetFields(fldName))
 437                {
 438                    var val = CreateProperty(field, document);
 439                    RavenJToken arrayToken;
 440                    var tryGetValue = documentFromFields.TryGetValue(field.Name, out arrayToken);
 441                    if (tryGetValue || document.GetField(isArray) != null)
 442                    {
 443                        var array = arrayToken as RavenJArray;
 444                        if (array == null)
 445                        {
 446                            documentFromFields[field.Name] = array =
 447                                (tryGetValue ? new RavenJArray { arrayToken } : new RavenJArray());
 448                        }
 449                        array.Add(val);
 450                    }
 451                    else
 452                    {
 453                        documentFromFields[field.Name] = val;
 454                    }
 455                }
 456            }
 457        }
 458
 459        protected void InvokeOnIndexEntryDeletedOnAllBatchers(List<AbstractIndexUpdateTriggerBatcher> batchers, Term term)
 460        {
 461            if (!batchers.Any(batcher => batcher.RequiresDocumentOnIndexEntryDeleted)) return;
 462            // find all documents
 463            var key = term.Text;
 464
 465            IndexSearcher searcher = null;
 466            using (GetSearcher(out searcher))
 467            {
 468                var collector = new GatherAllCollector();
 469                searcher.Search(new TermQuery(term), collector);
 470                var topDocs = collector.ToTopDocs();
 471
 472                foreach (var scoreDoc in topDocs.ScoreDocs)
 473                {
 474                    var document = searcher.Doc(scoreDoc.Doc);
 475                    batchers.ApplyAndIgnoreAllErrors(
 476                        exception =>
 477                        {
 478                            logIndexing.WarnException(
 479                                string.Format(
 480                                    "Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
 481                                    PublicName, key),
 482                                exception);
 483                            context.AddError(indexId, PublicName, key, exception, "OnIndexEntryDeleted Trigger");
 484                        },
 485                        trigger => trigger.OnIndexEntryDeleted(key, document));
 486                }
 487            }
 488        }
 489
 490        private static RavenJToken CreateProperty(Field fld, Document document)
 491        {
 492            if (fld.IsBinary)
 493                return fld.GetBinaryValue();
 494            var stringValue = fld.StringValue;
 495            if (document.GetField(fld.Name + "_ConvertToJson") != null)
 496            {
 497                var val = RavenJToken.Parse(fld.StringValue) as RavenJObject;
 498                return val;
 499            }
 500            if (stringValue == Constants.NullValue)
 501                stringValue = null;
 502            if (stringValue == Constants.EmptyString)
 503                stringValue = string.Empty;
 504            return stringValue;
 505        }
 506
 507        protected void Write(Func<RavenIndexWriter, Analyzer, IndexingWorkStats, IndexedItemsInfo> action, List<PerformanceStats> writePerformanceStats = null)
 508        {
 509            if (disposed)
 510                throw new ObjectDisposedException("Index " + PublicName + " has been disposed");
 511
 512            Stopwatch extensionExecutionDuration = null;
 513            Stopwatch flushToDiskDuration = null;
 514            Stopwatch recreateSearcherDuration = null;
 515
 516            if (writePerformanceStats != null)
 517            {
 518                extensionExecutionDuration = new Stopwatch();
 519                flushToDiskDuration = new Stopwatch();
 520                recreateSearcherDuration = new Stopwatch();
 521            }
 522
 523            PreviousIndexTime = LastIndexTime;
 524            LastIndexTime = SystemTime.UtcNow;
 525
 526            lock (writeLock)
 527            {
 528                bool shouldRecreateSearcher;
 529                var toDispose = new List<Action>();
 530                Analyzer searchAnalyzer = null;
 531                var itemsInfo = new IndexedItemsInfo(null);
 532                bool flushed = false;
 533
 534                try
 535                {
 536                    waitReason = "Write";
 537                    try
 538                    {
 539                        searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
 540                    }
 541                    catch (Exception e)
 542                    {
 543                        context.AddError(indexId, indexDefinition.Name, "Creating Analyzer", e, "Analyzer");
 544                        throw;
 545                    }
 546
 547                    EnsureIndexWriter();
 548
 549                    var locker = directory.MakeLock("writing-to-index.lock");
 550                    try
 551                    {
 552                        var stats = new IndexingWorkStats();
 553
 554                        try
 555                        {
 556                            if (locker.Obtain() == false)
 557                            {
 558                                throw new InvalidOperationException(
 559                                    string.Format("Could not obtain the 'writing-to-index' lock of '{0}' index",
 560                                                                                  PublicName));
 561                            }
 562
 563                            itemsInfo = action(indexWriter, searchAnalyzer, stats);
 564                            shouldRecreateSearcher = itemsInfo.ChangedDocs > 0;
 565                            foreach (var indexExtension in indexExtensions.Values)
 566                            {
 567                                using (StopwatchScope.For(extensionExecutionDuration, resetBeforeStart: true))
 568                                {
 569                                    indexExtension.OnDocumentsIndexed(currentlyIndexDocuments, searchAnalyzer);
 570                                }
 571
 572                                IndexingOperation operation;
 573                                if (writePerformanceStats != null && Enum.TryParse(string.Format("Extension_{0}", indexExtension.Name), out operation))
 574                                {
 575                                    writePerformanceStats.Add(PerformanceStats.From(operation, extensionExecutionDuration.ElapsedMilliseconds));
 576                                }
 577                            }
 578                        }
 579                        catch (Exception e)
 580                        {
 581                            var invalidSpatialShapeException = e as InvalidSpatialShapException;
 582                            var invalidDocId = (invalidSpatialShapeException == null) ?
 583                                                        null :
 584                                                        invalidSpatialShapeException.InvalidDocumentId;
 585                            context.AddError(indexId, indexDefinition.Name, invalidDocId, e, "Write");
 586                            throw;
 587                        }
 588
 589                        if (itemsInfo.ChangedDocs > 0)
 590                        {
 591                            using (StopwatchScope.For(flushToDiskDuration))
 592                            {
 593                                WriteInMemoryIndexToDiskIfNecessary(itemsInfo.HighestETag);
 594
 595                                if (indexWriter != null && indexWriter.RamSize() >= flushSize)
 596                                {
 597                                    Flush(itemsInfo.HighestETag); // just make sure changes are flushed to disk
 598                                    flushed = true;
 599                                }
 600                            }
 601
 602                            UpdateIndexingStats(context, stats);
 603                        }
 604                    }
 605                    finally
 606                    {
 607                        locker.Release();
 608                    }
 609                }
 610                catch (Exception e)
 611                {
 612                    throw new InvalidOperationException("Could not properly write to index " + PublicName, e);
 613                }
 614                finally
 615                {
 616                    currentlyIndexDocuments.Clear();
 617                    if (searchAnalyzer != null)
 618                        searchAnalyzer.Close();
 619                    foreach (Action dispose in toDispose)
 620                    {
 621                        dispose();
 622                    }
 623                    waitReason = null;
 624                    LastIndexTime = SystemTime.UtcNow;
 625                }
 626
 627                if (flushed)
 628                {
 629                    try
 630                    {
 631                        HandleCommitPoints(itemsInfo, GetCurrentSegmentsInfo());
 632                    }
 633                    catch (Exception e)
 634                    {
 635                        logIndexing.WarnException("Could not handle commit point properly, ignoring", e);
 636                    }
 637                }
 638
 639                if (shouldRecreateSearcher)
 640                {
 641                    using (StopwatchScope.For(recreateSearcherDuration))
 642                    {
 643                        RecreateSearcher();
 644                    }
 645                }
 646            }
 647
 648            if (writePerformanceStats != null)
 649            {
 650                writePerformanceStats.Add(PerformanceStats.From(IndexingOperation.Lucene_FlushToDisk, flushToDiskDuration.ElapsedMilliseconds));
 651                writePerformanceStats.Add(PerformanceStats.From(IndexingOperation.Lucene_RecreateSearcher, recreateSearcherDuration.ElapsedMilliseconds));
 652            }
 653        }
 654
 655        private IndexSegmentsInfo GetCurrentSegmentsInfo()
 656        {
 657            if (directory is RAMDirectory)
 658                return null;
 659
 660            return IndexStorage.GetCurrentSegmentsInfo(indexDefinition.Name, directory);
 661        }
 662
 663        protected abstract void HandleCommitPoints(IndexedItemsInfo itemsInfo, IndexSegmentsInfo segmentsInfo);
 664
 665        protected void UpdateIndexingStats(WorkContext workContext, IndexingWorkStats stats)
 666        {
 667            // we'll try this for ten times, and if we get concurrency conflict, we do NOT fail, we'll retry
 668            // if we can't run even after ten times, we just give up. The stats might be a bit out, but that is fine for us
 669            bool run = true;
 670            for (int i = 0; i < 10 && run; i++)
 671            {
 672                run = false;
 673                switch (stats.Operation)
 674                {
 675                    case IndexingWorkStats.Status.Map:
 676                        workContext.TransactionalStorage.Batch(accessor =>
 677                        {
 678                            try
 679                            {
 680                                accessor.Indexing.UpdateIndexingStats(indexId, stats);
 681                            }
 682                            catch (Exception e)
 683                            {
 684                                if (accessor.IsWriteConflict(e))
 685                                {
 686                                    run = true;
 687                                    return;
 688                                }
 689                                throw;
 690                            }
 691                        });
 692                        break;
 693                    case IndexingWorkStats.Status.Reduce:
 694                        workContext.TransactionalStorage.Batch(accessor =>
 695                        {
 696                            try
 697                            {
 698                                accessor.Indexing.UpdateReduceStats(indexId, stats);
 699                            }
 700                            catch (Exception e)
 701                            {
 702                                if (accessor.IsWriteConflict(e))
 703                                {
 704                                    run = true;
 705                                    return;
 706                                }
 707                                throw;
 708                            }
 709                        });
 710                        break;
 711                    case IndexingWorkStats.Status.Ignore:
 712                        break;
 713                    default:
 714                        throw new ArgumentOutOfRangeException();
 715                }
 716                if (run)
 717                    Thread.Sleep(11);
 718            }
 719        }
 720
 721        private void CreateIndexWriter()
 722        {
 723            try
 724            {
 725                snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
 726                IndexWriter.IndexReaderWarmer indexReaderWarmer = context.IndexReaderWarmers != null
 727                    ? new IndexReaderWarmersWrapper(indexDefinition.Name, context.IndexReaderWarmers)
 728                    : null;
 729                indexWriter = new RavenIndexWriter(directory, stopAnalyzer, snapshotter, IndexWriter.MaxFieldLength.UNLIMITED, context.Configuration.Indexing.MaxWritesBeforeRecreate, indexReaderWarmer);
 730            }
 731            catch (Exception e)
 732            {
 733                HandleWriteError(e);
 734                throw new IOException("Failure to create index writer for " + PublicName, e);
 735            }
 736        }
 737
 738        internal void WriteInMemoryIndexToDiskIfNecessary(Etag highestETag)
 739        {
 740            if (context.Configuration.Core.RunInMemory ||
 741                context.IndexDefinitionStorage == null) // may happen during index startup
 742                return;
 743
 744            var dir = indexWriter.Directory as RAMDirectory;
 745            if (dir == null)
 746                return;
 747
 748            var stale = IsUpToDateEnoughToWriteToDisk(highestETag) == false;
 749            var toobig = new Size(dir.SizeInBytes(), SizeUnit.Bytes) >= context.Configuration.Indexing.NewIndexInMemoryMaxSize;
 750
 751            var tooOld = (SystemTime.UtcNow - _indexCreationTime) > context.Configuration.Indexing.NewIndexInMemoryMaxTime.AsTimeSpan;
 752
 753            if (forceWriteToDisk || toobig || !stale || tooOld)
 754            {
 755                indexWriter.Commit(highestETag);
 756                var fsDir = context.IndexStorage.MakeRAMDirectoryPhysical(dir, indexDefinition);
 757                IndexStorage.WriteIndexVersion(fsDir, indexDefinition);
 758                directory = fsDir;
 759
 760                indexWriter.Dispose(true);
 761                dir.Dispose();
 762
 763                CreateIndexWriter();
 764
 765                ResetWriteErrors();
 766            }
 767        }
 768
 769        protected abstract bool IsUpToDateEnoughToWriteToDisk(Etag highestETag);
 770
 771        public RavenPerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose, bool forQuerying = false)
 772        {
 773            toDispose.Add(defaultAnalyzer.Close);
 774
 775            string value;
 776            if (indexDefinition.Analyzers.TryGetValue(Constants.AllFields, out value))
 777            {
 778                defaultAnalyzer = IndexingExtensions.CreateAnalyzerInstance(Constants.AllFields, value);
 779                toDispose.Add(defaultAnalyzer.Close);
 780            }
 781            var perFieldAnalyzerWrapper = new RavenPerFieldAnalyzerWrapper(defaultAnalyzer);
 782            foreach (var analyzer in indexDefinition.Analyzers)
 783            {
 784                Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
 785                toDispose.Add(analyzerInstance.Close);
 786
 787                if (forQuerying)
 788                {
 789                    var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false);
 790                    if (customAttributes.Length > 0)
 791                        continue;
 792                }
 793
 794                perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
 795            }
 796            StandardAnalyzer standardAnalyzer = null;
 797            KeywordAnalyzer keywordAnalyzer = null;
 798            foreach (var fieldIndexing in indexDefinition.Indexes)
 799            {
 800                switch (fieldIndexing.Value)
 801                {
 802                    case FieldIndexing.NotAnalyzed:
 803                        if (keywordAnalyzer == null)
 804                        {
 805                            keywordAnalyzer = new KeywordAnalyzer();
 806                            toDispose.Add(keywordAnalyzer.Close);
 807                        }
 808                        perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
 809                        break;
 810                    case FieldIndexing.Analyzed:
 811                        if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
 812                            continue;
 813                        if (standardAnalyzer == null)
 814                        {
 815                            standardAnalyzer = new RavenStandardAnalyzer(Version.LUCENE_29);
 816                            //standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
 817                            toDispose.Add(standardAnalyzer.Close);
 818                        }
 819                        perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
 820                        break;
 821                }
 822            }
 823            return perFieldAnalyzerWrapper;
 824        }
 825
 826        protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats, Stopwatch linqExecutionDuration)
 827        {
 828            Action<Exception, object> onErrorFunc;
 829            return RobustEnumerationIndex(input, funcs, stats, out onErrorFunc, linqExecutionDuration);
 830        }
 831
 832        protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats, out Action<Exception, object> onErrorFunc, Stopwatch linqExecutionDuration)
 833        {
 834            onErrorFunc = (exception, o) =>
 835                {
 836                    string docId = null;
 837
 838                    var invalidSpatialException = exception as InvalidSpatialShapException;
 839                    if (invalidSpatialException != null)
 840                        docId = invalidSpatialException.InvalidDocumentId;
 841
 842                    context.AddError(indexId, indexDefinition.Name, docId ?? TryGetDocKey(o), exception, "Map");
 843
 844                    logIndexing.WarnException(
 845                    String.Format("Failed to execute indexing function on {0} on {1}", indexDefinition.Name, TryGetDocKey(o)), exception);
 846
 847                    stats.IndexingErrors++;
 848                };
 849
 850            return new RobustEnumerator(context.CancellationToken, context.Configuration.Core.MaxNumberOfItemsToProcessInSingleBatch,
 851                beforeMoveNext: () => Interlocked.Increment(ref stats.IndexingAttempts),
 852                cancelMoveNext: () => Interlocked.Decrement(ref stats.IndexingAttempts),
 853                onError: onErrorFunc)
 854                {
 855                    MoveNextDuration = linqExecutionDuration
 856                }
 857                .RobustEnumeration(input, funcs);
 858        }
 859
 860        protected IEnumerable<object> RobustEnumerationReduce(IEnumerator<object> input, IndexingFunc func, IndexingWorkStats stats, Stopwatch linqExecutionDuration)
 861        {
 862            // not strictly accurate, but if we get that many errors, probably an error anyway.
 863            return new RobustEnumerator(context.CancellationToken, context.Configuration.Core.MaxNumberOfItemsToProcessInSingleBatch,
 864                beforeMoveNext: () => Interlocked.Increment(ref stats.ReduceAttempts),
 865                cancelMoveNext: () => Interlocked.Decrement(ref stats.ReduceAttempts),
 866                onError: (exception, o) =>
 867                {
 868                    var key = TryGetDocKey(o);
 869
 870                    context.AddError(indexId,
 871                        indexDefinition.Name,
 872                        key,
 873                        exception,
 874                        "Reduce"
 875                        );
 876                    logIndexing.WarnException(
 877                        String.Format("Failed to execute indexing function on {0} on {1}", indexDefinition.Name,
 878                            key),
 879                        exception);
 880
 881                    stats.ReduceErrors++;
 882                }) 
 883                {
 884                    MoveNextDuration = linqExecutionDuration
 885                }.RobustEnumeration(input, func);
 886        }
 887
 888        // we don't care about tracking map/reduce stats here, since it is merely
 889        // an optimization step
 890        protected IEnumerable<object> RobustEnumerationReduceDuringMapPhase(IEnumerator<object> input, IndexingFunc func, Stopwatch reduceDuringMapLinqExecution)
 891        {
 892            // not strictly accurate, but if we get that many errors, probably an error anyway.
 893            return new RobustEnumerator(context.CancellationToken, context.Configuration.Core.MaxNumberOfItemsToProcessInSingleBatch,
 894                onError: (exception, o) =>
 895                {
 896                    var keys = TryGetDocKeys(input, o);
 897                    var concatenatedKeys = string.Join(";", keys);
 898
 899                    context.AddError(indexId,
 900                                     indexDefinition.Name,
 901                                    concatenatedKeys,
 902                                    exception,
 903                                    "Reduce"
 904                        );
 905
 906                    logIndexing.WarnException(
 907                        String.Format("Failed to execute indexing function on {0} on {1}", indexDefinition.Name,
 908                                        concatenatedKeys),
 909                        exception);
 910                })
 911                {
 912                    MoveNextDuration = reduceDuringMapLinqExecution
 913                }.RobustEnumeration(input, func);
 914        }
 915
 916        private static IEnumerable<string> TryGetDocKeys(IEnumerator<object> input, object current)
 917        {
 918            var keys = new HashSet<string>();
 919            var key = TryGetDocKey(current);
 920
 921            if (string.IsNullOrEmpty(key) == false)
 922                keys.Add(key);
 923            else
 924            {
 925                input.Reset();
 926                while (input.MoveNext())
 927                {
 928                    key = TryGetDocKey(input.Current);
 929                    if (string.IsNullOrEmpty(key))
 930                        continue;
 931
 932                    keys.Add(key);
 933                }
 934            }
 935
 936            return keys;
 937        }
 938
 939        public static string TryGetDocKey(object current)
 940        {
 941            var dic = current as DynamicJsonObject;
 942            if (dic == null)
 943                return null;
 944            object value = dic.GetValue(Constants.DocumentIdFieldName) ??
 945                           dic.GetValue(Constants.ReduceKeyFieldName);
 946            if (value != null)
 947                return value.ToString();
 948            return null;
 949        }
 950
 951        public abstract void Remove(string[] keys, WorkContext context);
 952
 953        internal IndexSearcherHolder.IndexSearcherHoldingState GetCurrentStateHolder()
 954        {
 955            return currentIndexSearcherHolder.GetCurrentStateHolder();
 956        }
 957
 958        internal IDisposable GetSearcher(out IndexSearcher searcher)
 959        {
 960            return currentIndexSearcherHolder.GetSearcher(out searcher);
 961        }
 962
 963        internal IDisposable GetSearcherAndTermsDocs(out IndexSearcher searcher, out RavenJObject[] termsDocs)
 964        {
 965            return currentIndexSearcherHolder.GetSearcherAndTermDocs(out searcher, out termsDocs);
 966        }
 967
 968        private void RecreateSearcher()
 969        {
 970            if (indexWriter == null)
 971            {
 972                currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(directory, true), PublicName, wait: false);
 973            }
 974            else
 975            {
 976                var indexReader = indexWriter.GetReader();
 977                currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(indexReader), PublicName, wait: false);
 978            }
 979        }
 980
 981        protected void AddDocumentToIndex(RavenIndexWriter currentIndexWriter, Document luceneDoc, Analyzer analyzer)
 982        {
 983            Analyzer newAnalyzer = AnalyzerGenerators.Aggregate(analyzer,
 984                                                                (currentAnalyzer, generator) =>
 985                                                                {
 986                                                                    Analyzer generateAnalyzer =
 987                                                                        generator.Value.GenerateAnalyzerForIndexing(PublicName, luceneDoc,
 988                                                                                                            currentAnalyzer);
 989                                                                    if (generateAnalyzer != currentAnalyzer &&
 990                                                                        currentAnalyzer != analyzer)
 991                                                                        currentAnalyzer.Close();
 992                                                                    return generateAnalyzer;
 993                                                                });
 994
 995            try
 996            {
 997                if (indexExtensions.Count > 0)
 998                    currentlyIndexDocuments.Add(CloneDocument(luceneDoc));
 999
1000                currentIndexWriter.AddDocument(luceneDoc, newAnalyzer);
1001
1002                foreach (var fieldable in luceneDoc.GetFields())
1003                {
1004                    using (fieldable.ReaderValue) // dispose all the readers
1005                    {
1006
1007                    }
1008                }
1009            }
1010            finally
1011            {
1012                if (newAnalyzer != analyzer)
1013                    newAnalyzer.Close();
1014            }
1015        }
1016
1017        public void MarkQueried()
1018        {
1019            lastQueryTime = SystemTime.UtcNow;
1020        }
1021
1022        public void MarkQueried(DateTime time)
1023        {
1024            if (lastQueryTime != null &&
1025                lastQueryTime.Value >= time)
1026                return;
1027
1028            lastQueryTime = time;
1029        }
1030
1031        public IIndexExtension GetExtension(string indexExtensionKey)
1032        {
1033            IIndexExtension val;
1034            indexExtensions.TryGetValue(indexExtensionKey, out val);
1035            return val;
1036        }
1037
1038        public IIndexExtension GetExtensionByPrefix(string indexExtensionKeyPrefix)
1039        {
1040            return indexExtensions.FirstOrDefault(x => x.Key.StartsWith(indexExtensionKeyPrefix)).Value;
1041        }
1042
1043        public void SetExtension(string indexExtensionKey, IIndexExtension extension)
1044        {
1045            indexExtensions.TryAdd(indexExtensionKey, extension);
1046        }
1047
1048        private static Document CloneDocument(Document luceneDoc)
1049        {
1050            var clonedDocument = new Document();
1051            foreach (AbstractField field in luceneDoc.GetFields())
1052            {
1053                var numericField = field as NumericField;
1054                if (numericField != null)
1055                {
1056                    var clonedNumericField = new NumericField(numericField.Name,
1057                                                            numericField.IsStored ? Field.Store.YES : Field.Store.NO,
1058                                                            numericField.IsIndexed);
1059                    var numericValue = numericField.NumericValue;
1060                    if (numericValue is int)
1061                    {
1062                        clonedNumericField.SetIntValue((int)numericValue);
1063                    }
1064                    else if (numericValue is long)
1065                    {
1066                        clonedNumericField.SetLongValue((long)numericValue);
1067                    }
1068                    else if (numericValue is double)
1069                    {
1070                        clonedNumericField.SetDoubleValue((double)numericValue);
1071                    }
1072                    else if (numericValue is float)
1073                    {
1074                        clonedNumericField.SetFloatValue((float)numericValue);
1075                    }
1076                    clonedDocument.Add(clonedNumericField);
1077                }
1078                else
1079                {
1080                    Field clonedField;
1081                    if (field.IsBinary)
1082                    {
1083                        clonedField = new Field(field.Name, field.GetBinaryValue(),
1084                                                field.IsStored ? Field.Store.YES : Field.Store.NO);
1085                    }
1086                    else if (field.StringValue != null)
1087                    {
1088                        clonedField = new Field(field.Name, field.StringValue,
1089                                                field.IsStored ? Field.Store.YES : Field.Store.NO,
1090                                                field.IsIndexed ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NOT_ANALYZED_NO_NORMS,
1091                                                field.IsTermVectorStored ? Field.TermVector.YES : Field.TermVector.NO);
1092                    }
1093                    else
1094                    {
1095                        //probably token stream, and we can't handle fields with token streams, so we skip this.
1096                        continue;
1097                    }
1098                    clonedDocument.Add(clonedField);
1099                }
1100            }
1101            return clonedDocument;
1102        }
1103
1104        protected void LogIndexedDocument(string key, Document luceneDoc)
1105        {
1106            if (!logIndexing.IsDebugEnabled)
1107                return;
1108
1109            var fieldsForLogging = luceneDoc.GetFields().Select(x => new
1110            {
1111                x.Name,
1112                Value = x.IsBinary ? "<binary>" : x.StringValue,
1113                Indexed = x.IsIndexed,
1114                Stored = x.IsStored,
1115            });
1116            var sb = new StringBuilder();
1117            foreach (var fieldForLogging in fieldsForLogging)
1118            {
1119                sb.Append("\t").Append(fieldForLogging.Name)
1120                    .Append(" ")
1121                    .Append(fieldForLogging.Indexed ? "I" : "-")
1122                    .Append(fieldForLogging.Stored ? "S" : "-")
1123                    .Append(": ")
1124                    .Append(fieldForLogging.Value)
1125                    .AppendLine();
1126            }
1127
1128            if (logIndexing.IsDebugEnabled)
1129                logIndexing.Debug("Indexing on {0} result in index {1} gave document: {2}", key, PublicName,
1130                sb.ToString());
1131        }
1132
1133        [CLSCompliant(false)]
1134        public static void AssertQueryDoesNotContainFieldsThatAreNotIndexed(IndexQuery indexQuery, AbstractViewGenerator viewGenerator)
1135        {
1136            if (string.IsNullOrWhiteSpace(indexQuery.Query) == false)
1137            {
1138                HashSet<string> hashSet = SimpleQueryParser.GetFields(indexQuery);
1139                foreach (string field in hashSet)
1140                {
1141                    string f = field;
1142                    if (f.EndsWith("_Range"))
1143                    {
1144                        f = f.Substring(0, f.Length - "_Range".Length);
1145                    }
1146                    if (viewGenerator.ContainsField(f) == false &&
1147                        viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
1148                        throw new ArgumentException("The field '" + f + "' is not indexed, cannot query on fields that are not indexed");
1149                }
1150            }
1151            if (indexQuery.SortedFields != null)
1152            {
1153                foreach (SortedField sortedField in indexQuery.SortedFields)
1154                {
1155                    string field = sortedField.Field;
1156                    if (field == Constants.TemporaryScoreValue)
1157                        continue;
1158                    if (field.EndsWith("_Range"))
1159                    {
1160                        field = field.Substring(0, field.Length - "_Range".Length);
1161                    }
1162
1163                    if (field.StartsWith(Constants.RandomFieldName) || field.StartsWith(Constants.CustomSortFieldName))
1164                        continue;
1165
1166                    if (field.StartsWith(Constants.AlphaNumericFieldName))
1167                    {
1168                        field = SortFieldHelper.CustomField(field).Name;
1169                        if (string.IsNullOrEmpty(field))
1170                            throw new ArgumentException("Alpha numeric sorting requires a field name");
1171                    }
1172
1173                    if (viewGenerator.ContainsField(field) == false && !field.StartsWith(Constants.DistanceFieldName)
1174                            && viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
1175                        throw new ArgumentException("The field '" + field + "' is not indexed, cannot sort on fields that are not indexed");
1176                }
1177            }
1178        }
1179
1180        #region Nested type: IndexQueryOperation
1181
1182        public class IndexQueryOperation
1183        {
1184            FastVectorHighlighter highlighter;
1185            FieldQuery fieldQuery;
1186
1187            private readonly Stopwatch _queryParseDuration = new Stopwatch();
1188            private readonly IndexQuery indexQuery;
1189            private readonly Index parent;
1190            private readonly Func<IndexQueryResult, bool> shouldInclude

Large files files are truncated, but you can click here to view the full file