PageRenderTime 8ms CodeModel.GetById 6ms app.highlight 46ms RepoModel.GetById 1ms app.codeStats 1ms

/Raven.Database/Indexing/Index.cs

Relevant Search: With Applications for Solr and Elasticsearch

'Chapter 4. Taming tokens'. If you want to know how to extract ideas rather than words this book is for you. Learn concepts of precision and recall, making trade-offs between them and controlling the specificity of matches. Amazon Affiliate Link
https://github.com/nwendel/ravendb
C# | 1734 lines | 1494 code | 192 blank | 48 comment | 242 complexity | 027b6e1fe2d4a5f61b7aceeee3216d69 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1//-----------------------------------------------------------------------
   2// <copyright file="Index.cs" company="Hibernating Rhinos LTD">
   3//     Copyright (c) Hibernating Rhinos LTD. All rights reserved.
   4// </copyright>
   5//-----------------------------------------------------------------------
   6using System;
   7using System.Collections;
   8using System.Collections.Concurrent;
   9using System.Collections.Generic;
  10using System.Collections.Specialized;
  11using System.ComponentModel.Composition;
  12using System.Diagnostics;
  13using System.IO;
  14using System.Linq;
  15using System.Text;
  16using System.Threading;
  17using System.Threading.Tasks;
  18using Lucene.Net.Analysis;
  19using Lucene.Net.Analysis.Standard;
  20using Lucene.Net.Documents;
  21using Lucene.Net.Index;
  22using Lucene.Net.Search;
  23using Lucene.Net.Search.Vectorhighlight;
  24using Lucene.Net.Store;
  25using Raven.Abstractions;
  26using Raven.Abstractions.Data;
  27using Raven.Abstractions.Exceptions;
  28using Raven.Abstractions.Extensions;
  29using Raven.Abstractions.Indexing;
  30using Raven.Abstractions.Linq;
  31using Raven.Abstractions.Logging;
  32using Raven.Abstractions.MEF;
  33using Raven.Database.Data;
  34using Raven.Database.Extensions;
  35using Raven.Database.Linq;
  36using Raven.Database.Plugins;
  37using Raven.Database.Storage;
  38using Raven.Database.Tasks;
  39using Raven.Database.Util;
  40using Raven.Json.Linq;
  41using Directory = Lucene.Net.Store.Directory;
  42using Document = Lucene.Net.Documents.Document;
  43using Field = Lucene.Net.Documents.Field;
  44using Version = Lucene.Net.Util.Version;
  45
  46namespace Raven.Database.Indexing
  47{
  48	/// <summary>
  49	/// 	This is a thread safe, single instance for a particular index.
  50	/// </summary>
  51	public abstract class Index : IDisposable
  52	{
  53		protected static readonly ILog logIndexing = LogManager.GetLogger(typeof(Index).FullName + ".Indexing");
  54		protected static readonly ILog logQuerying = LogManager.GetLogger(typeof(Index).FullName + ".Querying");
  55		private readonly List<Document> currentlyIndexDocuments = new List<Document>();
  56		protected Directory directory;
  57		protected readonly IndexDefinition indexDefinition;
  58		private volatile string waitReason;
  59
  60		public IndexingPriority Priority { get; set; }
  61
  62		/// <summary>
  63		/// Note, this might be written to be multiple threads at the same time
  64		/// We don't actually care for exact timing, it is more about general feeling
  65		/// </summary>
  66		private DateTime? lastQueryTime;
  67
  68		private readonly ConcurrentDictionary<string, IIndexExtension> indexExtensions =
  69			new ConcurrentDictionary<string, IIndexExtension>();
  70
  71		internal readonly int indexId;
  72
  73	    public int IndexId
  74	    {
  75	        get { return indexId; }
  76	    }
  77
  78		private readonly AbstractViewGenerator viewGenerator;
  79		protected readonly WorkContext context;
  80
  81		private readonly object writeLock = new object();
  82		private volatile bool disposed;
  83		private RavenIndexWriter indexWriter;
  84		private SnapshotDeletionPolicy snapshotter;
  85		private readonly IndexSearcherHolder currentIndexSearcherHolder;
  86
  87		private readonly ConcurrentDictionary<string, IndexingPerformanceStats> currentlyIndexing = new ConcurrentDictionary<string, IndexingPerformanceStats>();
  88		private readonly ConcurrentQueue<IndexingPerformanceStats> indexingPerformanceStats = new ConcurrentQueue<IndexingPerformanceStats>();
  89		private readonly static StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_30);
  90		private bool forceWriteToDisk;
  91
  92		protected Index(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
  93		{
  94		    currentIndexSearcherHolder = new IndexSearcherHolder(id ,context);
  95		    if (directory == null) throw new ArgumentNullException("directory");
  96			if (indexDefinition == null) throw new ArgumentNullException("indexDefinition");
  97			if (viewGenerator == null) throw new ArgumentNullException("viewGenerator");
  98
  99			this.indexId = id;
 100			this.indexDefinition = indexDefinition;
 101			this.viewGenerator = viewGenerator;
 102			this.context = context;
 103			logIndexing.Debug("Creating index for {0}", indexId);
 104			this.directory = directory;
 105
 106			RecreateSearcher();
 107		}
 108
 109		[ImportMany]
 110		public OrderedPartCollection<AbstractAnalyzerGenerator> AnalyzerGenerators { get; set; }
 111
 112		/// <summary>
 113		/// Whatever this is a map reduce index or not
 114		/// </summary>
 115		public abstract bool IsMapReduce { get; }
 116
 117		public DateTime? LastQueryTime
 118		{
 119			get
 120			{
 121				return lastQueryTime;
 122			}
 123		}
 124
 125		public DateTime LastIndexTime { get; set; }
 126
 127		protected DateTime PreviousIndexTime { get; set; }
 128
 129		public string IsOnRam
 130		{
 131			get
 132			{
 133				var ramDirectory = directory as RAMDirectory;
 134				if (ramDirectory == null)
 135					return "false";
 136				try
 137				{
 138                    return "true (" + SizeHelper.Humane(ramDirectory.SizeInBytes()) + ")";
 139				}
 140				catch (AlreadyClosedException)
 141				{
 142					return "false";
 143				}
 144			}
 145		}
 146
 147	    public string PublicName { get { return this.indexDefinition.Name; } }
 148
 149		public volatile bool IsMapIndexingInProgress;
 150
 151		protected void RecordCurrentBatch(string indexingStep, int size)
 152		{
 153			var performanceStats = new IndexingPerformanceStats
 154			{
 155				InputCount = size, 
 156				Operation = indexingStep,
 157				Started = SystemTime.UtcNow,
 158			};
 159			currentlyIndexing.AddOrUpdate(indexingStep, performanceStats, (s, stats) => performanceStats);
 160		}
 161
 162		protected void BatchCompleted(string indexingStep)
 163		{
 164			IndexingPerformanceStats value;
 165			currentlyIndexing.TryRemove(indexingStep, out value);
 166		}
 167
 168		protected void AddindexingPerformanceStat(IndexingPerformanceStats stats)
 169		{
 170			indexingPerformanceStats.Enqueue(stats);
 171			while (indexingPerformanceStats.Count > 25)
 172				indexingPerformanceStats.TryDequeue(out stats);
 173		}
 174
 175		public void Dispose()
 176		{
 177			try
 178			{
 179				// this is here so we can give good logs in the case of a long shutdown process
 180				if (Monitor.TryEnter(writeLock, 100) == false)
 181				{
 182					var localReason = waitReason;
 183					if (localReason != null)
 184						logIndexing.Warn("Waiting for {0} to complete before disposing of index {1}, that might take a while if the server is very busy",
 185						 localReason, indexId);
 186
 187					Monitor.Enter(writeLock);
 188				}
 189
 190				disposed = true;
 191
 192				foreach (var indexExtension in indexExtensions)
 193				{
 194					indexExtension.Value.Dispose();
 195				}
 196
 197				if (currentIndexSearcherHolder != null)
 198				{
 199					var item = currentIndexSearcherHolder.SetIndexSearcher(null, wait: true);
 200					if (item.WaitOne(TimeSpan.FromSeconds(5)) == false)
 201					{
 202						logIndexing.Warn("After closing the index searching, we waited for 5 seconds for the searching to be done, but it wasn't. Continuing with normal shutdown anyway.");
 203					}
 204				}
 205
 206				if (indexWriter != null)
 207				{
 208					try
 209					{
 210						ForceWriteToDisk();
 211						WriteInMemoryIndexToDiskIfNecessary(Etag.Empty);
 212					}
 213					catch (Exception e)
 214					{
 215						logIndexing.ErrorException("Error while writing in memory index to disk.", e);
 216					}
 217				}
 218
 219				if (indexWriter != null) // just in case, WriteInMemoryIndexToDiskIfNecessary recreates writer
 220				{
 221					var writer = indexWriter;
 222					indexWriter = null;
 223
 224					try
 225					{
 226						writer.Analyzer.Close();
 227					}
 228					catch (Exception e)
 229					{
 230						logIndexing.ErrorException("Error while closing the index (closing the analyzer failed)", e);
 231					}
 232
 233					try
 234					{
 235						writer.Dispose();
 236					}
 237					catch (Exception e)
 238					{
 239						logIndexing.ErrorException("Error when closing the index", e);
 240					}
 241				}
 242
 243				try
 244				{
 245					directory.Dispose();
 246				}
 247				catch (Exception e)
 248				{
 249					logIndexing.ErrorException("Error when closing the directory", e);
 250				}
 251			}
 252			finally
 253			{
 254				Monitor.Exit(writeLock);
 255			}
 256		}
 257
 258		public void Flush(Etag highestETag)
 259		{
 260			lock (writeLock)
 261			{
 262				if (disposed)
 263					return;
 264				if (indexWriter == null)
 265					return;
 266
 267				try
 268				{
 269					waitReason = "Flush";
 270                    indexWriter.Commit(highestETag);
 271				}
 272				finally
 273				{
 274					waitReason = null;
 275				}
 276			}
 277		}
 278
 279		public void MergeSegments()
 280		{
 281			lock (writeLock)
 282			{
 283				waitReason = "Merge / Optimize";
 284				try
 285				{
 286					logIndexing.Info("Starting merge of {0}", indexId);
 287					var sp = Stopwatch.StartNew();
 288					if (indexWriter == null)
 289					{
 290						CreateIndexWriter();
 291					}
 292					indexWriter.Optimize();
 293					logIndexing.Info("Done merging {0} - took {1}", indexId, sp.Elapsed);
 294				}
 295				finally
 296				{
 297					waitReason = null;
 298				}
 299			}
 300		}
 301
 302		public abstract void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp);
 303
 304		protected virtual IndexQueryResult RetrieveDocument(Document document, FieldsToFetch fieldsToFetch, ScoreDoc score)
 305		{
 306			return new IndexQueryResult
 307			{
 308				Score = score.Score,
 309				Key = document.Get(Constants.DocumentIdFieldName),
 310				Projection = (fieldsToFetch.IsProjection || fieldsToFetch.FetchAllStoredFields) ? CreateDocumentFromFields(document, fieldsToFetch) : null
 311			};
 312		}
 313
 314		public static RavenJObject CreateDocumentFromFields(Document document, FieldsToFetch fieldsToFetch)
 315		{
 316			var documentFromFields = new RavenJObject();
 317			var fields = fieldsToFetch.Fields;
 318			if (fieldsToFetch.FetchAllStoredFields)
 319				fields = fields.Concat(document.GetFields().Select(x => x.Name));
 320
 321
 322			var q = fields
 323				.Distinct()
 324				.SelectMany(name => document.GetFields(name) ?? new Field[0])
 325				.Where(x => x != null)
 326				.Where(
 327					x =>
 328					x.Name.EndsWith("_IsArray") == false &&
 329					x.Name.EndsWith("_Range") == false &&
 330					x.Name.EndsWith("_ConvertToJson") == false)
 331				.Select(fld => CreateProperty(fld, document))
 332				.GroupBy(x => x.Key)
 333				.Select(g =>
 334				{
 335					if (g.Count() == 1 && document.GetField(g.Key + "_IsArray") == null)
 336					{
 337						return g.First();
 338					}
 339					var ravenJTokens = g.Select(x => x.Value).ToArray();
 340					return new KeyValuePair<string, RavenJToken>(g.Key, new RavenJArray((IEnumerable)ravenJTokens));
 341				});
 342			foreach (var keyValuePair in q)
 343			{
 344				documentFromFields.Add(keyValuePair.Key, keyValuePair.Value);
 345			}
 346			return documentFromFields;
 347		}
 348
 349        protected void InvokeOnIndexEntryDeletedOnAllBatchers(List<AbstractIndexUpdateTriggerBatcher> batchers, Term term)
 350        {
 351            if (!batchers.Any(batcher => batcher.RequiresDocumentOnIndexEntryDeleted)) return;
 352            // find all documents
 353            var key = term.Text;
 354
 355            IndexSearcher searcher = null;
 356            using (GetSearcher(out searcher))
 357            {
 358                var collector = new GatherAllCollector();
 359                searcher.Search(new TermQuery(term), collector);
 360                var topDocs = collector.ToTopDocs();
 361                
 362                foreach (var scoreDoc in topDocs.ScoreDocs)
 363                {
 364                    var document = searcher.Doc(scoreDoc.Doc);
 365                    batchers.ApplyAndIgnoreAllErrors(
 366                        exception =>
 367                        {
 368                            logIndexing.WarnException(
 369                                string.Format(
 370                                    "Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
 371                                    indexId, key),
 372                                exception);
 373                            context.AddError(indexId, key, exception.Message, "OnIndexEntryDeleted Trigger");
 374                        },
 375                        trigger => trigger.OnIndexEntryDeleted(key, document));
 376                }
 377            }
 378        }
 379
 380		private static KeyValuePair<string, RavenJToken> CreateProperty(Field fld, Document document)
 381		{
 382			if (fld.IsBinary)
 383				return new KeyValuePair<string, RavenJToken>(fld.Name, fld.GetBinaryValue());
 384			var stringValue = fld.StringValue;
 385			if (document.GetField(fld.Name + "_ConvertToJson") != null)
 386			{
 387				var val = RavenJToken.Parse(fld.StringValue) as RavenJObject;
 388				return new KeyValuePair<string, RavenJToken>(fld.Name, val);
 389			}
 390			if (stringValue == Constants.NullValue)
 391				stringValue = null;
 392			if (stringValue == Constants.EmptyString)
 393				stringValue = string.Empty;
 394			return new KeyValuePair<string, RavenJToken>(fld.Name, stringValue);
 395		}
 396
 397		protected void Write(Func<RavenIndexWriter, Analyzer, IndexingWorkStats, IndexedItemsInfo> action)
 398		{
 399			if (disposed)
 400				throw new ObjectDisposedException("Index " + PublicName + " has been disposed");
 401
 402			PreviousIndexTime = LastIndexTime;
 403			LastIndexTime = SystemTime.UtcNow;
 404
 405			lock (writeLock)
 406			{
 407				bool shouldRecreateSearcher;
 408				var toDispose = new List<Action>();
 409				Analyzer searchAnalyzer = null;
 410				var itemsInfo = new IndexedItemsInfo(null);
 411
 412			    try
 413			    {
 414			        waitReason = "Write";
 415			        try
 416			        {
 417			            searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
 418			        }
 419			        catch (Exception e)
 420			        {
 421						context.AddError(indexId, indexDefinition.Name, "Creating Analyzer", e.ToString(), "Analyzer");
 422			            throw;
 423			        }
 424
 425			        if (indexWriter == null)
 426			        {
 427			            CreateIndexWriter();
 428			        }
 429
 430			        var locker = directory.MakeLock("writing-to-index.lock");
 431			        try
 432			        {
 433			            var stats = new IndexingWorkStats();
 434
 435			            try
 436			            {
 437			                if (locker.Obtain() == false)
 438			                {
 439			                    throw new InvalidOperationException(
 440			                        string.Format("Could not obtain the 'writing-to-index' lock of '{0}' index",
 441																				  PublicName));
 442			                }
 443
 444			                itemsInfo = action(indexWriter, searchAnalyzer, stats);
 445			                shouldRecreateSearcher = itemsInfo.ChangedDocs > 0;
 446			                foreach (var indexExtension in indexExtensions.Values)
 447			                {
 448			                    indexExtension.OnDocumentsIndexed(currentlyIndexDocuments, searchAnalyzer);
 449			                }
 450			            }
 451			            catch (Exception e)
 452			            {
 453                            var invalidSpatialShapeException = e as InvalidSpatialShapeException;
 454                            var invalidDocId = (invalidSpatialShapeException == null) ?
 455                                                        null :
 456                                                        invalidSpatialShapeException.InvalidDocumentId;
 457                            context.AddError(indexId, indexDefinition.Name, invalidDocId, e.ToString(), "Write");
 458			                throw;
 459			            }
 460
 461			            if (itemsInfo.ChangedDocs > 0)
 462			            {
 463			                WriteInMemoryIndexToDiskIfNecessary(itemsInfo.HighestETag);
 464			                Flush(itemsInfo.HighestETag); // just make sure changes are flushed to disk
 465				            
 466							UpdateIndexingStats(context, stats);
 467			            }
 468			        }
 469			        finally
 470			        {
 471			            locker.Release();
 472			        }
 473			    }
 474			    catch (Exception e)
 475			    {
 476					throw new InvalidOperationException("Could not properly write to index " + PublicName, e);
 477			    }
 478				finally
 479				{
 480					currentlyIndexDocuments.Clear();
 481					if (searchAnalyzer != null)
 482						searchAnalyzer.Close();
 483					foreach (Action dispose in toDispose)
 484					{
 485						dispose();
 486					}
 487					waitReason = null;
 488					LastIndexTime = SystemTime.UtcNow;
 489				}
 490
 491				try
 492				{
 493					HandleCommitPoints(itemsInfo, GetCurrentSegmentsInfo());
 494				}
 495				catch (Exception e)
 496				{
 497					logIndexing.WarnException("Could not handle commit point properly, ignoring", e);
 498				}
 499
 500				if (shouldRecreateSearcher)
 501					RecreateSearcher();
 502			}
 503		}
 504
 505		private IndexSegmentsInfo GetCurrentSegmentsInfo()
 506		{
 507			if (directory is RAMDirectory)
 508				return null;
 509
 510			return IndexStorage.GetCurrentSegmentsInfo(indexDefinition.Name, directory);
 511		}
 512
 513		protected abstract void HandleCommitPoints(IndexedItemsInfo itemsInfo, IndexSegmentsInfo segmentsInfo);
 514
 515		protected void UpdateIndexingStats(WorkContext workContext, IndexingWorkStats stats)
 516		{
 517			switch (stats.Operation)
 518			{
 519				case IndexingWorkStats.Status.Map:
 520					workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateIndexingStats(indexId, stats));
 521					break;
 522				case IndexingWorkStats.Status.Reduce:
 523					workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateReduceStats(indexId, stats));
 524					break;
 525				case IndexingWorkStats.Status.Ignore:
 526					break;
 527				default:
 528					throw new ArgumentOutOfRangeException();
 529			}
 530		}
 531
 532		private void CreateIndexWriter()
 533		{
 534			snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
 535		    IndexWriter.IndexReaderWarmer indexReaderWarmer = context.IndexReaderWarmers != null
 536		                                                          ? new IndexReaderWarmersWrapper(indexDefinition.Name, context.IndexReaderWarmers)
 537		                                                          : null;
 538			indexWriter = new RavenIndexWriter(directory, stopAnalyzer, snapshotter, IndexWriter.MaxFieldLength.UNLIMITED, context.Configuration.MaxIndexWritesBeforeRecreate, indexReaderWarmer);
 539		}
 540
 541		private void WriteInMemoryIndexToDiskIfNecessary(Etag highestETag)
 542		{
 543			if (context.Configuration.RunInMemory ||
 544				context.IndexDefinitionStorage == null) // may happen during index startup
 545				return;
 546
 547			var dir = indexWriter.Directory as RAMDirectory;
 548			if (dir == null)
 549				return;
 550
 551			var stale = IsUpToDateEnoughToWriteToDisk(highestETag) == false;
 552			var toobig = dir.SizeInBytes() >= context.Configuration.NewIndexInMemoryMaxBytes;
 553
 554			if (forceWriteToDisk || toobig || !stale)
 555			{
 556				indexWriter.Commit(highestETag);
 557				var fsDir = context.IndexStorage.MakeRAMDirectoryPhysical(dir, indexDefinition);
 558				IndexStorage.WriteIndexVersion(fsDir, indexDefinition);
 559				directory = fsDir;
 560
 561				indexWriter.Dispose(true);
 562				dir.Dispose();
 563
 564				CreateIndexWriter();
 565			}
 566		}
 567
 568		protected abstract bool IsUpToDateEnoughToWriteToDisk(Etag highestETag);
 569
 570		public RavenPerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose, bool forQuerying = false)
 571		{
 572			toDispose.Add(defaultAnalyzer.Close);
 573
 574			string value;
 575			if (indexDefinition.Analyzers.TryGetValue(Constants.AllFields, out value))
 576			{
 577				defaultAnalyzer = IndexingExtensions.CreateAnalyzerInstance(Constants.AllFields, value);
 578				toDispose.Add(defaultAnalyzer.Close);
 579			}
 580			var perFieldAnalyzerWrapper = new RavenPerFieldAnalyzerWrapper(defaultAnalyzer);
 581			foreach (var analyzer in indexDefinition.Analyzers)
 582			{
 583				Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
 584				toDispose.Add(analyzerInstance.Close);
 585
 586				if (forQuerying)
 587				{
 588					var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false);
 589					if (customAttributes.Length > 0)
 590						continue;
 591				}
 592
 593				perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
 594			}
 595			StandardAnalyzer standardAnalyzer = null;
 596			KeywordAnalyzer keywordAnalyzer = null;
 597			foreach (var fieldIndexing in indexDefinition.Indexes)
 598			{
 599				switch (fieldIndexing.Value)
 600				{
 601					case FieldIndexing.NotAnalyzed:
 602						if (keywordAnalyzer == null)
 603						{
 604							keywordAnalyzer = new KeywordAnalyzer();
 605							toDispose.Add(keywordAnalyzer.Close);
 606						}
 607						perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
 608						break;
 609					case FieldIndexing.Analyzed:
 610						if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
 611							continue;
 612						if (standardAnalyzer == null)
 613						{
 614							standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
 615							toDispose.Add(standardAnalyzer.Close);
 616						}
 617						perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
 618						break;
 619				}
 620			}
 621			return perFieldAnalyzerWrapper;
 622		}
 623
 624		protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats)
 625		{
 626	        Action<Exception, object> onErrorFunc;
 627	        return RobustEnumerationIndex(input, funcs, stats, out onErrorFunc);
 628	    }
 629
 630	    protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats,out Action<Exception,object> onErrorFunc)
 631			{
 632            onErrorFunc = (exception, o) =>
 633				{
 634                string docId = null;
 635                var invalidSpatialException = exception as InvalidSpatialShapeException;
 636                if (invalidSpatialException != null)
 637                    docId = invalidSpatialException.InvalidDocumentId;
 638
 639	            context.AddError(indexId,
 640	                indexDefinition.Name,
 641                    docId ?? TryGetDocKey(o),
 642									exception.Message,
 643									"Map"
 644						);
 645                
 646					logIndexing.WarnException(
 647	                String.Format("Failed to execute indexing function on {0} on {1}", indexId,
 648										TryGetDocKey(o)),
 649						exception);
 650
 651					stats.IndexingErrors++;
 652	        };
 653	        return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToProcessInSingleBatch)
 654			{
 655				BeforeMoveNext = () => Interlocked.Increment(ref stats.IndexingAttempts),
 656				CancelMoveNext = () => Interlocked.Decrement(ref stats.IndexingAttempts),
 657                OnError = onErrorFunc
 658			}.RobustEnumeration(input, funcs);
 659		}
 660
 661		protected IEnumerable<object> RobustEnumerationReduce(IEnumerator<object> input, IndexingFunc func,
 662															IStorageActionsAccessor actions,
 663			IndexingWorkStats stats)
 664		{
 665			// not strictly accurate, but if we get that many errors, probably an error anyway.
 666			return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToProcessInSingleBatch)
 667			{
 668				BeforeMoveNext = () => Interlocked.Increment(ref stats.ReduceAttempts),
 669				CancelMoveNext = () => Interlocked.Decrement(ref stats.ReduceAttempts),
 670				OnError = (exception, o) =>
 671				{
 672					context.AddError(indexId,
 673                                     indexDefinition.Name,
 674									TryGetDocKey(o),
 675									exception.Message,
 676									"Reduce"
 677						);
 678					logIndexing.WarnException(
 679						String.Format("Failed to execute indexing function on {0} on {1}", indexId,
 680										TryGetDocKey(o)),
 681						exception);
 682
 683					stats.ReduceErrors++;
 684				}
 685			}.RobustEnumeration(input, func);
 686		}
 687
 688		// we don't care about tracking map/reduce stats here, since it is merely
 689		// an optimization step
 690		protected IEnumerable<object> RobustEnumerationReduceDuringMapPhase(IEnumerator<object> input, IndexingFunc func)
 691		{
 692			// not strictly accurate, but if we get that many errors, probably an error anyway.
 693			return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToProcessInSingleBatch)
 694			{
 695				BeforeMoveNext = () => { }, // don't care
 696				CancelMoveNext = () => { }, // don't care
 697				OnError = (exception, o) =>
 698				{
 699					context.AddError(indexId,
 700                                     indexDefinition.Name,
 701									TryGetDocKey(o),
 702									exception.Message,
 703									"Reduce"
 704						);
 705					logIndexing.WarnException(
 706						String.Format("Failed to execute indexing function on {0} on {1}", indexId,
 707										TryGetDocKey(o)),
 708						exception);
 709				}
 710			}.RobustEnumeration(input, func);
 711		}
 712
 713		public static string TryGetDocKey(object current)
 714		{
 715			var dic = current as DynamicJsonObject;
 716			if (dic == null)
 717				return null;
 718		    object value = dic.GetValue(Constants.DocumentIdFieldName) ??
 719		                   dic.GetValue(Constants.ReduceKeyFieldName);
 720		    if (value != null)
 721			return value.ToString();
 722		    return null;
 723		}
 724
 725		public abstract void Remove(string[] keys, WorkContext context);
 726
 727		internal IndexSearcherHolder.IndexSearcherHoldingState GetCurrentStateHolder()
 728		{
 729			return currentIndexSearcherHolder.GetCurrentStateHolder();
 730		}
 731
 732		internal IDisposable GetSearcher(out IndexSearcher searcher)
 733		{
 734			return currentIndexSearcherHolder.GetSearcher(out searcher);
 735		}
 736
 737		internal IDisposable GetSearcherAndTermsDocs(out IndexSearcher searcher, out RavenJObject[] termsDocs)
 738		{
 739			return currentIndexSearcherHolder.GetSearcherAndTermDocs(out searcher, out termsDocs);
 740		}
 741
 742		private void RecreateSearcher()
 743		{
 744			if (indexWriter == null)
 745			{
 746				currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(directory, true), wait: false);
 747			}
 748			else
 749			{
 750				var indexReader = indexWriter.GetReader();
 751				currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(indexReader), wait: false);
 752			}
 753		}
 754
 755		protected void AddDocumentToIndex(RavenIndexWriter currentIndexWriter, Document luceneDoc, Analyzer analyzer)
 756		{
 757			Analyzer newAnalyzer = AnalyzerGenerators.Aggregate(analyzer,
 758																(currentAnalyzer, generator) =>
 759																{
 760																	Analyzer generateAnalyzer =
 761																		generator.Value.GenerateAnalyzerForIndexing(indexId.ToString(), luceneDoc,
 762																											currentAnalyzer);
 763																	if (generateAnalyzer != currentAnalyzer &&
 764																		currentAnalyzer != analyzer)
 765																		currentAnalyzer.Close();
 766																	return generateAnalyzer;
 767																});
 768
 769			try
 770			{
 771				if (indexExtensions.Count > 0)
 772					currentlyIndexDocuments.Add(CloneDocument(luceneDoc));
 773
 774				currentIndexWriter.AddDocument(luceneDoc, newAnalyzer);
 775
 776				foreach (var fieldable in luceneDoc.GetFields())
 777				{
 778					using (fieldable.ReaderValue) // dispose all the readers
 779					{
 780						
 781					}
 782				}
 783			}
 784			finally
 785			{
 786				if (newAnalyzer != analyzer)
 787					newAnalyzer.Close();
 788			}
 789		}
 790
 791		public void MarkQueried()
 792		{
 793			lastQueryTime = SystemTime.UtcNow;
 794		}
 795
 796		public void MarkQueried(DateTime time)
 797		{
 798			lastQueryTime = time;
 799		}
 800
 801		public IIndexExtension GetExtension(string indexExtensionKey)
 802		{
 803			IIndexExtension val;
 804			indexExtensions.TryGetValue(indexExtensionKey, out val);
 805			return val;
 806		}
 807
 808		public IIndexExtension GetExtensionByPrefix(string indexExtensionKeyPrefix)
 809		{
 810			return indexExtensions.FirstOrDefault(x => x.Key.StartsWith(indexExtensionKeyPrefix)).Value;
 811		}
 812
 813		public void SetExtension(string indexExtensionKey, IIndexExtension extension)
 814		{
 815			indexExtensions.TryAdd(indexExtensionKey, extension);
 816		}
 817
 818		private static Document CloneDocument(Document luceneDoc)
 819		{
 820			var clonedDocument = new Document();
 821			foreach (AbstractField field in luceneDoc.GetFields())
 822			{
 823				var numericField = field as NumericField;
 824				if (numericField != null)
 825				{
 826					var clonedNumericField = new NumericField(numericField.Name,
 827															numericField.IsStored ? Field.Store.YES : Field.Store.NO,
 828															numericField.IsIndexed);
 829					var numericValue = numericField.NumericValue;
 830					if (numericValue is int)
 831					{
 832						clonedNumericField.SetIntValue((int)numericValue);
 833					}
 834					else if (numericValue is long)
 835					{
 836						clonedNumericField.SetLongValue((long)numericValue);
 837					}
 838					else if (numericValue is double)
 839					{
 840						clonedNumericField.SetDoubleValue((double)numericValue);
 841					}
 842					else if (numericValue is float)
 843					{
 844						clonedNumericField.SetFloatValue((float)numericValue);
 845					}
 846					clonedDocument.Add(clonedNumericField);
 847				}
 848				else
 849				{
 850					Field clonedField;
 851					if (field.IsBinary)
 852					{
 853						clonedField = new Field(field.Name, field.GetBinaryValue(),
 854												field.IsStored ? Field.Store.YES : Field.Store.NO);
 855					}
 856					else if (field.StringValue != null)
 857					{
 858						clonedField = new Field(field.Name, field.StringValue,
 859												field.IsStored ? Field.Store.YES : Field.Store.NO,
 860												field.IsIndexed ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NOT_ANALYZED_NO_NORMS,
 861												field.IsTermVectorStored ? Field.TermVector.YES : Field.TermVector.NO);
 862					}
 863					else
 864					{
 865						//probably token stream, and we can't handle fields with token streams, so we skip this.
 866						continue;
 867					}
 868					clonedDocument.Add(clonedField);
 869				}
 870			}
 871			return clonedDocument;
 872		}
 873
 874		protected void LogIndexedDocument(string key, Document luceneDoc)
 875		{
 876			if (logIndexing.IsDebugEnabled)
 877			{
 878				var fieldsForLogging = luceneDoc.GetFields().Cast<IFieldable>().Select(x => new
 879				{
 880					Name = x.Name,
 881					Value = x.IsBinary ? "<binary>" : x.StringValue,
 882					Indexed = x.IsIndexed,
 883					Stored = x.IsStored,
 884				});
 885				var sb = new StringBuilder();
 886				foreach (var fieldForLogging in fieldsForLogging)
 887				{
 888					sb.Append("\t").Append(fieldForLogging.Name)
 889						.Append(" ")
 890						.Append(fieldForLogging.Indexed ? "I" : "-")
 891						.Append(fieldForLogging.Stored ? "S" : "-")
 892						.Append(": ")
 893						.Append(fieldForLogging.Value)
 894						.AppendLine();
 895				}
 896
 897				logIndexing.Debug("Indexing on {0} result in index {1} gave document: {2}", key, indexId,
 898								sb.ToString());
 899			}
 900		}
 901
 902	    public static void AssertQueryDoesNotContainFieldsThatAreNotIndexed(IndexQuery indexQuery, AbstractViewGenerator viewGenerator)
 903        {
 904		    if (string.IsNullOrWhiteSpace(indexQuery.Query) == false)
 905		    {
 906            HashSet<string> hashSet = SimpleQueryParser.GetFields(indexQuery);
 907            foreach (string field in hashSet)
 908            {
 909                string f = field;
 910                if (f.EndsWith("_Range"))
 911                {
 912                    f = f.Substring(0, f.Length - "_Range".Length);
 913                }
 914                if (viewGenerator.ContainsField(f) == false &&
 915                    viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
 916                    throw new ArgumentException("The field '" + f + "' is not indexed, cannot query on fields that are not indexed");
 917            }
 918		    }
 919		    if (indexQuery.SortedFields != null)
 920		    {
 921            foreach (SortedField field in indexQuery.SortedFields)
 922            {
 923                string f = field.Field;
 924                if (f == Constants.TemporaryScoreValue)
 925                    continue;
 926                if (f.EndsWith("_Range"))
 927                {
 928                    f = f.Substring(0, f.Length - "_Range".Length);
 929                }
 930                if (f.StartsWith(Constants.RandomFieldName))
 931                    continue;
 932                if (viewGenerator.ContainsField(f) == false && f != Constants.DistanceFieldName
 933				        && viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
 934                    throw new ArgumentException("The field '" + f + "' is not indexed, cannot sort on fields that are not indexed");
 935            }
 936        }
 937        }
 938
 939
 940
 941		#region Nested type: IndexQueryOperation
 942
 943		internal class IndexQueryOperation
 944		{
 945			FastVectorHighlighter highlighter;
 946			FieldQuery fieldQuery;
 947
 948			private readonly IndexQuery indexQuery;
 949			private readonly Index parent;
 950			private readonly Func<IndexQueryResult, bool> shouldIncludeInResults;
 951			private readonly HashSet<RavenJObject> alreadyReturned;
 952			private readonly FieldsToFetch fieldsToFetch;
 953			private readonly HashSet<string> documentsAlreadySeenInPreviousPage = new HashSet<string>();
 954			private readonly OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers;
 955			private readonly List<string> reduceKeys;
 956
 957			public IndexQueryOperation(Index parent, IndexQuery indexQuery, Func<IndexQueryResult, bool> shouldIncludeInResults, FieldsToFetch fieldsToFetch, OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers, List<string> reduceKeys = null)
 958			{
 959				this.parent = parent;
 960				this.indexQuery = indexQuery;
 961				this.shouldIncludeInResults = shouldIncludeInResults;
 962				this.fieldsToFetch = fieldsToFetch;
 963				this.indexQueryTriggers = indexQueryTriggers;
 964				this.reduceKeys = reduceKeys;
 965
 966				if (fieldsToFetch.IsDistinctQuery)
 967					alreadyReturned = new HashSet<RavenJObject>(new RavenJTokenEqualityComparer());
 968			}
 969
 970			public IEnumerable<RavenJObject> IndexEntries(Reference<int> totalResults)
 971			{
 972				parent.MarkQueried();
 973				using (IndexStorage.EnsureInvariantCulture())
 974				{
 975					AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
 976					IndexSearcher indexSearcher;
 977					RavenJObject[] termsDocs;
 978					using (parent.GetSearcherAndTermsDocs(out indexSearcher, out termsDocs))
 979					{
 980                        var documentQuery = GetDocumentQuery();
 981
 982						TopDocs search = ExecuteQuery(indexSearcher, documentQuery, indexQuery.Start, indexQuery.PageSize, indexQuery);
 983						totalResults.Value = search.TotalHits;
 984
 985						for (int index = indexQuery.Start; index < search.ScoreDocs.Length; index++)
 986						{
 987							var scoreDoc = search.ScoreDocs[index];
 988							var ravenJObject = (RavenJObject)termsDocs[scoreDoc.Doc].CloneToken();
 989							foreach (var prop in ravenJObject.Where(x => x.Key.EndsWith("_Range")).ToArray())
 990							{
 991								ravenJObject.Remove(prop.Key);
 992							}
 993
 994							if (reduceKeys == null)
 995							yield return ravenJObject;
 996							else
 997							{
 998								RavenJToken reduceKeyValue;
 999								if (ravenJObject.TryGetValue(Constants.ReduceKeyFieldName, out reduceKeyValue) && reduceKeys.Any(x => reduceKeyValue.Equals(new RavenJValue(x))))
1000								{
1001									yield return ravenJObject;
1002						}
1003					}
1004				}
1005			}
1006				}
1007			}
1008
1009			public IEnumerable<IndexQueryResult> Query(CancellationToken token)
1010			{
1011			    if (parent.Priority.HasFlag(IndexingPriority.Error))
1012			        throw new IndexDisabledException("The index has been disabled due to errors");
1013
1014				parent.MarkQueried();
1015				using (IndexStorage.EnsureInvariantCulture())
1016				{
1017					AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
1018					IndexSearcher indexSearcher;
1019					using (parent.GetSearcher(out indexSearcher))
1020					{
1021						var documentQuery = GetDocumentQuery();
1022
1023
1024						int start = indexQuery.Start;
1025						int pageSize = indexQuery.PageSize;
1026						int returnedResults = 0;
1027						int skippedResultsInCurrentLoop = 0;
1028						bool readAll;
1029						bool adjustStart = true;
1030						DuplicateDocumentRecorder recorder = null;
1031						if (indexQuery.SkipDuplicateChecking == false)
1032							recorder = new DuplicateDocumentRecorder(indexSearcher, parent, documentsAlreadySeenInPreviousPage,
1033								alreadyReturned, fieldsToFetch, parent.IsMapReduce || fieldsToFetch.IsProjection);
1034
1035						do
1036						{
1037							if (skippedResultsInCurrentLoop > 0)
1038							{
1039								start = start + pageSize - (start - indexQuery.Start); // need to "undo" the index adjustment
1040								// trying to guesstimate how many results we will need to read from the index
1041								// to get enough unique documents to match the page size
1042								pageSize = Math.Max(2, skippedResultsInCurrentLoop) * pageSize;
1043								skippedResultsInCurrentLoop = 0;
1044							}
1045							TopDocs search;
1046							int moreRequired = 0;
1047							do
1048							{
1049								token.ThrowIfCancellationRequested(); 
1050								search = ExecuteQuery(indexSearcher, documentQuery, start, pageSize, indexQuery);
1051
1052								if (recorder != null)
1053								{
1054								moreRequired = recorder.RecordResultsAlreadySeenForDistinctQuery(search, adjustStart, pageSize, ref start);
1055									pageSize += moreRequired*2;
1056								}
1057							} while (moreRequired > 0);
1058
1059							indexQuery.TotalSize.Value = search.TotalHits;
1060							adjustStart = false;
1061
1062							SetupHighlighter(documentQuery);
1063
1064							for (var i = start; (i - start) < pageSize && i < search.ScoreDocs.Length; i++)
1065							{
1066								var scoreDoc = search.ScoreDocs[i];
1067								var document = indexSearcher.Doc(scoreDoc.Doc);
1068								var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, scoreDoc);
1069								if (ShouldIncludeInResults(indexQueryResult) == false)
1070								{
1071									indexQuery.SkippedResults.Value++;
1072									skippedResultsInCurrentLoop++;
1073									continue;
1074								}
1075
1076								AddHighlighterResults(indexSearcher, scoreDoc, indexQueryResult);
1077
1078								AddQueryExplanation(documentQuery, indexSearcher, scoreDoc, indexQueryResult);
1079
1080								returnedResults++;
1081								yield return indexQueryResult;
1082								if (returnedResults == indexQuery.PageSize)
1083									yield break;
1084							}
1085							readAll = search.TotalHits == search.ScoreDocs.Length;
1086						} while (returnedResults < indexQuery.PageSize && readAll == false);
1087					}
1088				}
1089			}
1090
1091			private void AddHighlighterResults(IndexSearcher indexSearcher, ScoreDoc scoreDoc, IndexQueryResult indexQueryResult)
1092			{
1093				if (highlighter == null)
1094					return;
1095
1096				var highlightings =
1097					from highlightedField in this.indexQuery.HighlightedFields
1098					select new
1099					{
1100						highlightedField.Field,
1101						highlightedField.FragmentsField,
1102						Fragments = highlighter.GetBestFragments(
1103							fieldQuery,
1104							indexSearcher.IndexReader,
1105							scoreDoc.Doc,
1106							highlightedField.Field,
1107							highlightedField.FragmentLength,
1108							highlightedField.FragmentCount)
1109					}
1110						into fieldHighlitings
1111						where fieldHighlitings.Fragments != null &&
1112							  fieldHighlitings.Fragments.Length > 0
1113						select fieldHighlitings;
1114
1115				if (fieldsToFetch.IsProjection || parent.IsMapReduce)
1116				{
1117					foreach (var highlighting in highlightings)
1118					{
1119						if (!string.IsNullOrEmpty(highlighting.FragmentsField))
1120						{
1121							indexQueryResult.Projection[highlighting.FragmentsField] = new RavenJArray(highlighting.Fragments);
1122						}
1123					}
1124				}
1125				else
1126				{
1127					indexQueryResult.Highligtings = highlightings.ToDictionary(x => x.Field, x => x.Fragments);
1128				}
1129			}
1130
1131			private void SetupHighlighter(Query documentQuery)
1132			{
1133				if (indexQuery.HighlightedFields != null && indexQuery.HighlightedFields.Length > 0)
1134				{
1135					highlighter = new FastVectorHighlighter(
1136						FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
1137						FastVectorHighlighter.DEFAULT_FIELD_MATCH,
1138						new SimpleFragListBuilder(),
1139						new SimpleFragmentsBuilder(
1140							indexQuery.HighlighterPreTags != null && indexQuery.HighlighterPreTags.Any()
1141								? indexQuery.HighlighterPreTags
1142								: BaseFragmentsBuilder.COLORED_PRE_TAGS,
1143							indexQuery.HighlighterPostTags != null && indexQuery.HighlighterPostTags.Any()
1144								? indexQuery.HighlighterPostTags
1145								: BaseFragmentsBuilder.COLORED_POST_TAGS));
1146
1147					fieldQuery = highlighter.GetFieldQuery(documentQuery);
1148				}
1149			}
1150
1151			private void AddQueryExplanation(Query documentQuery, IndexSearcher indexSearcher, ScoreDoc scoreDoc, IndexQueryResult indexQueryResult)
1152			{
1153				if(indexQuery.ExplainScores == false)
1154					return;
1155
1156				var explanation = indexSearcher.Explain(documentQuery, scoreDoc.Doc);
1157
1158				indexQueryResult.ScoreExplanation = explanation.ToString();
1159			}
1160
1161			private Query ApplyIndexTriggers(Query documentQuery)
1162			{
1163				documentQuery = indexQueryTriggers.Aggregate(documentQuery,
1164														   (current, indexQueryTrigger) =>
1165														   indexQueryTrigger.Value.ProcessQuery(parent.indexId.ToString(), current, indexQuery));
1166				return documentQuery;
1167			}
1168
1169			public IEnumerable<IndexQueryResult> IntersectionQuery(CancellationToken token)
1170			{
1171				using (IndexStorage.EnsureInvariantCulture())
1172				{
1173					AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
1174					IndexSearcher indexSearcher;
1175					using (parent.GetSearcher(out indexSearcher))
1176					{
1177						var subQueries = indexQuery.Query.Split(new[] { Constants.IntersectSeparator }, StringSplitOptions.RemoveEmptyEntries);
1178						if (subQueries.Length <= 1)
1179							throw new InvalidOperationException("Invalid INTERSECT query, must have multiple intersect clauses.");
1180
1181						//Not sure how to select the page size here??? The problem is that only docs in this search can be part 
1182						//of the final result because we're doing an intersection query (but we might exclude some of them)
1183						int pageSizeBestGuess = (indexQuery.Start + indexQuery.PageSize) * 2;
1184						int intersectMatches = 0, skippedResultsInCurrentLoop = 0;
1185						int previousBaseQueryMatches = 0, currentBaseQueryMatches = 0;
1186
1187                        var firstSubDocumentQuery = GetDocumentQuery(subQueries[0], indexQuery);
1188
1189						//Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for
1190						var search = ExecuteQuery(indexSearcher, firstSubDocumentQuery, 0, pageSizeBestGuess, indexQuery);
1191						currentBaseQueryMatches = search.ScoreDocs.Length;
1192						var intersectionCollector = new IntersectionCollector(indexSearcher, search.ScoreDocs);
1193
1194						do
1195						{
1196							token.ThrowIfCancellationRequested();
1197							if (skippedResultsInCurrentLoop > 0)
1198							{
1199								// We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated)
1200								pageSizeBestGuess = pageSizeBestGuess * 2;
1201
1202								search = ExecuteQuery(indexSearcher, firstSubDocumentQuery, 0, pageSizeBestGuess, indexQuery);
1203								previousBaseQueryMatches = currentBaseQueryMatches;
1204								currentBaseQueryMatches = search.ScoreDocs.Length;
1205								intersectionCollector = new IntersectionCollector(indexSearcher, search.ScoreDocs);
1206							}
1207
1208							for (int i = 1; i < subQueries.Length; i++)
1209							{
1210								var luceneSubQuery = GetDocumentQuery(subQueries[i], indexQuery);
1211								indexSearcher.Search(luceneSubQuery, null, intersectionCollector);
1212							}
1213
1214							var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
1215							intersectMatches = currentIntersectResults.Count;
1216							skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches;
1217						} while (intersectMatches < indexQuery.PageSize && //stop if we've got enough results to satisfy the pageSize
1218								 currentBaseQueryMatches < search.TotalHits && //stop if increasing the page size wouldn't make any difference
1219								 previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results
1220
1221						var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
1222						//It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize, 
1223						//because it's before the INTERSECTION has been applied, so only some of those results make it out.
1224						//Trying to give an accurate answer is going to be too costly, so we aren't going to try.
1225						indexQuery.TotalSize.Value = search.TotalHits;
1226						indexQuery.SkippedResults.Value = skippedResultsInCurrentLoop;
1227
1228						//Using the final set of results in the intersectionCollector
1229						int returnedResults = 0;
1230						for (int i = indexQuery.Start; i < intersectResults.Count && (i - indexQuery.Start) < pageSizeBestGuess; i++)
1231						{
1232							Document document = indexSearcher.Doc(intersectResults[i].LuceneId);
1233							IndexQueryResult indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]);
1234							if (ShouldIncludeInResults(indexQueryResult) == false)
1235							{
1236								indexQuery.SkippedResults.Value++;
1237								skippedResultsInCurrentLoop++;
1238								continue;
1239							}
1240							returnedResults++;
1241							yield return indexQueryResult;
1242							if (returnedResults == indexQuery.PageSize)
1243								yield break;
1244						}
1245					}
1246				}
1247			}
1248
1249			private bool ShouldIncludeInResults(IndexQueryResult indexQueryResult)
1250			{
1251				if (shouldIncludeInResults(indexQueryResult) == false)
1252					return false;
1253				if (documentsAlreadySeenInPreviousPage.Contains(indexQueryResult.Key))
1254					return false;
1255				if (fieldsToFetch.IsDistinctQuery && alreadyReturned.Add(indexQueryResult.Projection) == false)
1256					return false;
1257				return true;
1258			}
1259
1260			private void RecordResultsAlreadySeenForDistinctQuery(IndexSearcher indexSearcher, TopDocs search, int start, int pageSize)
1261			{
1262				var min = Math.Min(start, search.TotalHits);
1263
1264				// we are paging, we need to check that we don't have duplicates in the previous page
1265				// see here for details: http://groups.google.com/group/ravendb/browse_frm/thread/d71c44aa9e2a7c6e
1266				if (parent.IsMapReduce == false && fieldsToFetch.IsProjection == false && start - pageSize >= 0 && start < search.TotalHits)
1267				{
1268					for (int i = start - pageSize; i < min; i++)
1269					{
1270						var document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
1271						documentsAlreadySeenInPreviousPage.Add(document.Get(Constants.DocumentIdFieldName));
1272					}
1273				}
1274
1275				if (fieldsToFetch.IsDistinctQuery == false)
1276					return;
1277
1278				// add results that were already there in previous pages
1279				for (int i = 0; i < min; i++)
1280				{
1281					Document document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
1282					var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]);
1283					alreadyReturned.Add(indexQueryResult.Projection);
1284				}
1285			}
1286
1287			public Query GetDocumentQuery()
1288			{
1289				var q = GetDocumentQuery(indexQuery.Query, indexQuery);
1290				var spatialIndexQuery = indexQuery as SpatialIndexQuery;
1291				if (spatialIndexQuery != null)
1292				{
1293					var spatialField = parent.viewGenerator.GetSpatialField(spatialIndexQuery.SpatialFieldName);
1294					var dq = spatialField.MakeQuery(q, spatialField.GetStrategy(), spatialIndexQuery);
1295					if (q is MatchAllDocsQuery) return dq;
1296
1297					var bq = new BooleanQuery { { q, Occur.MUST }, { dq, Occur.MUST } };
1298					return bq;
1299				}
1300				return q;
1301			}
1302
1303			private Query GetDocumentQuery(string query, IndexQuery indexQuery)
1304			{
1305				Query documentQuery;
1306				if (String.IsNullOrEmpty(query))
1307				{
1308					logQuerying.Debug("Issuing query on index {0} for all documents", parent.indexId);
1309					documentQuery = new MatchAllDocsQuery();
1310				}
1311				else
1312				{
1313					logQuerying.Debug("Issuing query on index {0} for: {1}", parent.indexId, query);
1314					var toDispose = new List<Action>();
1315					RavenPerFieldAnalyzerWrapper searchAnalyzer = null;
1316					try
1317					{
1318						searchAnalyzer = parent.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
1319						searchAnalyzer = parent.AnalyzerGenerators.Aggregate(searchAnalyzer, (currentAnalyzer, generator) =>
1320						{
1321							Analyzer newAnalyzer = generator.GenerateAnalyzerForQuerying(parent.indexId.ToString(), indexQuery.Query, currentAnalyzer);
1322							if (newAnalyzer != currentAnalyzer)
1323							{
1324								DisposeAnalyzerAndFriends(toDispose, currentAnalyzer);
1325							}
1326							return parent.CreateAnalyzer(newAnalyzer, toDispose, true);
1327						});
1328						documentQuery = QueryBuilder.BuildQuery(query, indexQuery, searchAnalyzer);
1329					}
1330					finally
1331					{
1332						DisposeAnalyzerAndFriends(toDispose, searchAnalyzer);
1333					}
1334				}
1335				return ApplyIndexTriggers(documentQuery);
1336			}
1337
1338			private static void DisposeAnalyzerAndFriends(List<Action> toDispose, RavenPerFieldAnalyzerWrapper analyzer)
1339			{
1340				if (analyzer != null)
1341					analyzer.Close();
1342				foreach (Action dispose in toDispose)
1343				{
1344					dispose();
1345				}
1346				toDispose.Clear();
1347			}
1348
1349			private TopDocs ExecuteQuery(IndexSearcher indexSearcher, Query documentQuery, int start, int pageSize,
1350										IndexQuery indexQuery)
1351			{
1352				var sort = indexQuery.GetSort(parent.indexDefinition, parent.viewGenerator);
1353
1354				if (pageSize == Int32.MaxValue && sort == null) // we want all docs, no sorting required
1355				{
1356					var gatherAllCollector = new GatherAllCollector();
1357					indexSearcher.Search(documentQuery, gatherAllCollector);
1358					return gatherAllCollector.ToTopDocs();
1359				}
1360			    int absFullPage = Math.Abs(pageSize + start); // need to protect against ridiculously high values of pageSize + start that overflow
1361			    var minPageSize = Math.Max(absFullPage, 1);
1362
1363				// NOTE: We get Start + Pagesize results back so we have something to page on
1364				if (sort != null)
1365				{
1366					try
1367					{
1368						//indexSearcher.SetDefaultFieldSortScoring (sort.GetSort().Contains(SortField.FIELD_SCORE), false);
1369						indexSearcher.SetDefaultFieldSortScoring(true, false);
1370						var ret = indexSearcher.Search(documentQuery, null, minPageSize, sort);
1371						return ret;
1372					}
1373					finally
1374					{
1375						indexSearcher.SetDefaultFieldSortScoring(false, false);
1376					}
1377				}
1378				return indexSearcher.Search(documentQuery, null, minPageSize);
1379			}
1380		}
1381
1382		#endregion
1383
1384		public class DuplicateDocumentRecorder
1385		{
1386			private int min = -1;
1387			private readonly bool isProjectionOrMapReduce;
1388			private readonly Searchable indexSearcher;
1389			private readonly Index parent;
1390			private int alreadyScannedPositions, alreadyScannedPositionsForDistinct;
1391			private readonly HashSet<string> documentsAlreadySeenInPreviousPage;
1392			private readonly HashSet<RavenJObject> alreadyReturned;
1393			private readonly FieldsToFetch fieldsToFetch;
1394
1395			public DuplicateDocumentRecorder(Searchable indexSearcher,
1396				Index parent,
1397				HashSet<string> documentsAlreadySeenInPreviousPage,
1398				HashSet<RavenJObject> alreadyReturned,
1399				FieldsToFetch fieldsToFetch,
1400				bool isProjectionOrMapReduce)
1401		

Large files files are truncated, but you can click here to view the full file