PageRenderTime 66ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 1ms

/Raven.Database/Indexing/Index.cs

https://github.com/nwendel/ravendb
C# | 1734 lines | 1494 code | 192 blank | 48 comment | 242 complexity | 027b6e1fe2d4a5f61b7aceeee3216d69 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, CC-BY-SA-3.0
  1. //-----------------------------------------------------------------------
  2. // <copyright file="Index.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. //-----------------------------------------------------------------------
  6. using System;
  7. using System.Collections;
  8. using System.Collections.Concurrent;
  9. using System.Collections.Generic;
  10. using System.Collections.Specialized;
  11. using System.ComponentModel.Composition;
  12. using System.Diagnostics;
  13. using System.IO;
  14. using System.Linq;
  15. using System.Text;
  16. using System.Threading;
  17. using System.Threading.Tasks;
  18. using Lucene.Net.Analysis;
  19. using Lucene.Net.Analysis.Standard;
  20. using Lucene.Net.Documents;
  21. using Lucene.Net.Index;
  22. using Lucene.Net.Search;
  23. using Lucene.Net.Search.Vectorhighlight;
  24. using Lucene.Net.Store;
  25. using Raven.Abstractions;
  26. using Raven.Abstractions.Data;
  27. using Raven.Abstractions.Exceptions;
  28. using Raven.Abstractions.Extensions;
  29. using Raven.Abstractions.Indexing;
  30. using Raven.Abstractions.Linq;
  31. using Raven.Abstractions.Logging;
  32. using Raven.Abstractions.MEF;
  33. using Raven.Database.Data;
  34. using Raven.Database.Extensions;
  35. using Raven.Database.Linq;
  36. using Raven.Database.Plugins;
  37. using Raven.Database.Storage;
  38. using Raven.Database.Tasks;
  39. using Raven.Database.Util;
  40. using Raven.Json.Linq;
  41. using Directory = Lucene.Net.Store.Directory;
  42. using Document = Lucene.Net.Documents.Document;
  43. using Field = Lucene.Net.Documents.Field;
  44. using Version = Lucene.Net.Util.Version;
  45. namespace Raven.Database.Indexing
  46. {
  47. /// <summary>
  48. /// This is a thread safe, single instance for a particular index.
  49. /// </summary>
  50. public abstract class Index : IDisposable
  51. {
  52. protected static readonly ILog logIndexing = LogManager.GetLogger(typeof(Index).FullName + ".Indexing");
  53. protected static readonly ILog logQuerying = LogManager.GetLogger(typeof(Index).FullName + ".Querying");
  54. private readonly List<Document> currentlyIndexDocuments = new List<Document>();
  55. protected Directory directory;
  56. protected readonly IndexDefinition indexDefinition;
  57. private volatile string waitReason;
  58. public IndexingPriority Priority { get; set; }
  59. /// <summary>
  60. /// Note, this might be written to be multiple threads at the same time
  61. /// We don't actually care for exact timing, it is more about general feeling
  62. /// </summary>
  63. private DateTime? lastQueryTime;
  64. private readonly ConcurrentDictionary<string, IIndexExtension> indexExtensions =
  65. new ConcurrentDictionary<string, IIndexExtension>();
  66. internal readonly int indexId;
  67. public int IndexId
  68. {
  69. get { return indexId; }
  70. }
  71. private readonly AbstractViewGenerator viewGenerator;
  72. protected readonly WorkContext context;
  73. private readonly object writeLock = new object();
  74. private volatile bool disposed;
  75. private RavenIndexWriter indexWriter;
  76. private SnapshotDeletionPolicy snapshotter;
  77. private readonly IndexSearcherHolder currentIndexSearcherHolder;
  78. private readonly ConcurrentDictionary<string, IndexingPerformanceStats> currentlyIndexing = new ConcurrentDictionary<string, IndexingPerformanceStats>();
  79. private readonly ConcurrentQueue<IndexingPerformanceStats> indexingPerformanceStats = new ConcurrentQueue<IndexingPerformanceStats>();
  80. private readonly static StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_30);
  81. private bool forceWriteToDisk;
  82. protected Index(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
  83. {
  84. currentIndexSearcherHolder = new IndexSearcherHolder(id ,context);
  85. if (directory == null) throw new ArgumentNullException("directory");
  86. if (indexDefinition == null) throw new ArgumentNullException("indexDefinition");
  87. if (viewGenerator == null) throw new ArgumentNullException("viewGenerator");
  88. this.indexId = id;
  89. this.indexDefinition = indexDefinition;
  90. this.viewGenerator = viewGenerator;
  91. this.context = context;
  92. logIndexing.Debug("Creating index for {0}", indexId);
  93. this.directory = directory;
  94. RecreateSearcher();
  95. }
  96. [ImportMany]
  97. public OrderedPartCollection<AbstractAnalyzerGenerator> AnalyzerGenerators { get; set; }
  98. /// <summary>
  99. /// Whatever this is a map reduce index or not
  100. /// </summary>
  101. public abstract bool IsMapReduce { get; }
  102. public DateTime? LastQueryTime
  103. {
  104. get
  105. {
  106. return lastQueryTime;
  107. }
  108. }
  109. public DateTime LastIndexTime { get; set; }
  110. protected DateTime PreviousIndexTime { get; set; }
  111. public string IsOnRam
  112. {
  113. get
  114. {
  115. var ramDirectory = directory as RAMDirectory;
  116. if (ramDirectory == null)
  117. return "false";
  118. try
  119. {
  120. return "true (" + SizeHelper.Humane(ramDirectory.SizeInBytes()) + ")";
  121. }
  122. catch (AlreadyClosedException)
  123. {
  124. return "false";
  125. }
  126. }
  127. }
  128. public string PublicName { get { return this.indexDefinition.Name; } }
  129. public volatile bool IsMapIndexingInProgress;
  130. protected void RecordCurrentBatch(string indexingStep, int size)
  131. {
  132. var performanceStats = new IndexingPerformanceStats
  133. {
  134. InputCount = size,
  135. Operation = indexingStep,
  136. Started = SystemTime.UtcNow,
  137. };
  138. currentlyIndexing.AddOrUpdate(indexingStep, performanceStats, (s, stats) => performanceStats);
  139. }
  140. protected void BatchCompleted(string indexingStep)
  141. {
  142. IndexingPerformanceStats value;
  143. currentlyIndexing.TryRemove(indexingStep, out value);
  144. }
  145. protected void AddindexingPerformanceStat(IndexingPerformanceStats stats)
  146. {
  147. indexingPerformanceStats.Enqueue(stats);
  148. while (indexingPerformanceStats.Count > 25)
  149. indexingPerformanceStats.TryDequeue(out stats);
  150. }
  151. public void Dispose()
  152. {
  153. try
  154. {
  155. // this is here so we can give good logs in the case of a long shutdown process
  156. if (Monitor.TryEnter(writeLock, 100) == false)
  157. {
  158. var localReason = waitReason;
  159. if (localReason != null)
  160. logIndexing.Warn("Waiting for {0} to complete before disposing of index {1}, that might take a while if the server is very busy",
  161. localReason, indexId);
  162. Monitor.Enter(writeLock);
  163. }
  164. disposed = true;
  165. foreach (var indexExtension in indexExtensions)
  166. {
  167. indexExtension.Value.Dispose();
  168. }
  169. if (currentIndexSearcherHolder != null)
  170. {
  171. var item = currentIndexSearcherHolder.SetIndexSearcher(null, wait: true);
  172. if (item.WaitOne(TimeSpan.FromSeconds(5)) == false)
  173. {
  174. logIndexing.Warn("After closing the index searching, we waited for 5 seconds for the searching to be done, but it wasn't. Continuing with normal shutdown anyway.");
  175. }
  176. }
  177. if (indexWriter != null)
  178. {
  179. try
  180. {
  181. ForceWriteToDisk();
  182. WriteInMemoryIndexToDiskIfNecessary(Etag.Empty);
  183. }
  184. catch (Exception e)
  185. {
  186. logIndexing.ErrorException("Error while writing in memory index to disk.", e);
  187. }
  188. }
  189. if (indexWriter != null) // just in case, WriteInMemoryIndexToDiskIfNecessary recreates writer
  190. {
  191. var writer = indexWriter;
  192. indexWriter = null;
  193. try
  194. {
  195. writer.Analyzer.Close();
  196. }
  197. catch (Exception e)
  198. {
  199. logIndexing.ErrorException("Error while closing the index (closing the analyzer failed)", e);
  200. }
  201. try
  202. {
  203. writer.Dispose();
  204. }
  205. catch (Exception e)
  206. {
  207. logIndexing.ErrorException("Error when closing the index", e);
  208. }
  209. }
  210. try
  211. {
  212. directory.Dispose();
  213. }
  214. catch (Exception e)
  215. {
  216. logIndexing.ErrorException("Error when closing the directory", e);
  217. }
  218. }
  219. finally
  220. {
  221. Monitor.Exit(writeLock);
  222. }
  223. }
  224. public void Flush(Etag highestETag)
  225. {
  226. lock (writeLock)
  227. {
  228. if (disposed)
  229. return;
  230. if (indexWriter == null)
  231. return;
  232. try
  233. {
  234. waitReason = "Flush";
  235. indexWriter.Commit(highestETag);
  236. }
  237. finally
  238. {
  239. waitReason = null;
  240. }
  241. }
  242. }
  243. public void MergeSegments()
  244. {
  245. lock (writeLock)
  246. {
  247. waitReason = "Merge / Optimize";
  248. try
  249. {
  250. logIndexing.Info("Starting merge of {0}", indexId);
  251. var sp = Stopwatch.StartNew();
  252. if (indexWriter == null)
  253. {
  254. CreateIndexWriter();
  255. }
  256. indexWriter.Optimize();
  257. logIndexing.Info("Done merging {0} - took {1}", indexId, sp.Elapsed);
  258. }
  259. finally
  260. {
  261. waitReason = null;
  262. }
  263. }
  264. }
  265. public abstract void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp);
  266. protected virtual IndexQueryResult RetrieveDocument(Document document, FieldsToFetch fieldsToFetch, ScoreDoc score)
  267. {
  268. return new IndexQueryResult
  269. {
  270. Score = score.Score,
  271. Key = document.Get(Constants.DocumentIdFieldName),
  272. Projection = (fieldsToFetch.IsProjection || fieldsToFetch.FetchAllStoredFields) ? CreateDocumentFromFields(document, fieldsToFetch) : null
  273. };
  274. }
  275. public static RavenJObject CreateDocumentFromFields(Document document, FieldsToFetch fieldsToFetch)
  276. {
  277. var documentFromFields = new RavenJObject();
  278. var fields = fieldsToFetch.Fields;
  279. if (fieldsToFetch.FetchAllStoredFields)
  280. fields = fields.Concat(document.GetFields().Select(x => x.Name));
  281. var q = fields
  282. .Distinct()
  283. .SelectMany(name => document.GetFields(name) ?? new Field[0])
  284. .Where(x => x != null)
  285. .Where(
  286. x =>
  287. x.Name.EndsWith("_IsArray") == false &&
  288. x.Name.EndsWith("_Range") == false &&
  289. x.Name.EndsWith("_ConvertToJson") == false)
  290. .Select(fld => CreateProperty(fld, document))
  291. .GroupBy(x => x.Key)
  292. .Select(g =>
  293. {
  294. if (g.Count() == 1 && document.GetField(g.Key + "_IsArray") == null)
  295. {
  296. return g.First();
  297. }
  298. var ravenJTokens = g.Select(x => x.Value).ToArray();
  299. return new KeyValuePair<string, RavenJToken>(g.Key, new RavenJArray((IEnumerable)ravenJTokens));
  300. });
  301. foreach (var keyValuePair in q)
  302. {
  303. documentFromFields.Add(keyValuePair.Key, keyValuePair.Value);
  304. }
  305. return documentFromFields;
  306. }
  307. protected void InvokeOnIndexEntryDeletedOnAllBatchers(List<AbstractIndexUpdateTriggerBatcher> batchers, Term term)
  308. {
  309. if (!batchers.Any(batcher => batcher.RequiresDocumentOnIndexEntryDeleted)) return;
  310. // find all documents
  311. var key = term.Text;
  312. IndexSearcher searcher = null;
  313. using (GetSearcher(out searcher))
  314. {
  315. var collector = new GatherAllCollector();
  316. searcher.Search(new TermQuery(term), collector);
  317. var topDocs = collector.ToTopDocs();
  318. foreach (var scoreDoc in topDocs.ScoreDocs)
  319. {
  320. var document = searcher.Doc(scoreDoc.Doc);
  321. batchers.ApplyAndIgnoreAllErrors(
  322. exception =>
  323. {
  324. logIndexing.WarnException(
  325. string.Format(
  326. "Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
  327. indexId, key),
  328. exception);
  329. context.AddError(indexId, key, exception.Message, "OnIndexEntryDeleted Trigger");
  330. },
  331. trigger => trigger.OnIndexEntryDeleted(key, document));
  332. }
  333. }
  334. }
  335. private static KeyValuePair<string, RavenJToken> CreateProperty(Field fld, Document document)
  336. {
  337. if (fld.IsBinary)
  338. return new KeyValuePair<string, RavenJToken>(fld.Name, fld.GetBinaryValue());
  339. var stringValue = fld.StringValue;
  340. if (document.GetField(fld.Name + "_ConvertToJson") != null)
  341. {
  342. var val = RavenJToken.Parse(fld.StringValue) as RavenJObject;
  343. return new KeyValuePair<string, RavenJToken>(fld.Name, val);
  344. }
  345. if (stringValue == Constants.NullValue)
  346. stringValue = null;
  347. if (stringValue == Constants.EmptyString)
  348. stringValue = string.Empty;
  349. return new KeyValuePair<string, RavenJToken>(fld.Name, stringValue);
  350. }
  351. protected void Write(Func<RavenIndexWriter, Analyzer, IndexingWorkStats, IndexedItemsInfo> action)
  352. {
  353. if (disposed)
  354. throw new ObjectDisposedException("Index " + PublicName + " has been disposed");
  355. PreviousIndexTime = LastIndexTime;
  356. LastIndexTime = SystemTime.UtcNow;
  357. lock (writeLock)
  358. {
  359. bool shouldRecreateSearcher;
  360. var toDispose = new List<Action>();
  361. Analyzer searchAnalyzer = null;
  362. var itemsInfo = new IndexedItemsInfo(null);
  363. try
  364. {
  365. waitReason = "Write";
  366. try
  367. {
  368. searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
  369. }
  370. catch (Exception e)
  371. {
  372. context.AddError(indexId, indexDefinition.Name, "Creating Analyzer", e.ToString(), "Analyzer");
  373. throw;
  374. }
  375. if (indexWriter == null)
  376. {
  377. CreateIndexWriter();
  378. }
  379. var locker = directory.MakeLock("writing-to-index.lock");
  380. try
  381. {
  382. var stats = new IndexingWorkStats();
  383. try
  384. {
  385. if (locker.Obtain() == false)
  386. {
  387. throw new InvalidOperationException(
  388. string.Format("Could not obtain the 'writing-to-index' lock of '{0}' index",
  389. PublicName));
  390. }
  391. itemsInfo = action(indexWriter, searchAnalyzer, stats);
  392. shouldRecreateSearcher = itemsInfo.ChangedDocs > 0;
  393. foreach (var indexExtension in indexExtensions.Values)
  394. {
  395. indexExtension.OnDocumentsIndexed(currentlyIndexDocuments, searchAnalyzer);
  396. }
  397. }
  398. catch (Exception e)
  399. {
  400. var invalidSpatialShapeException = e as InvalidSpatialShapeException;
  401. var invalidDocId = (invalidSpatialShapeException == null) ?
  402. null :
  403. invalidSpatialShapeException.InvalidDocumentId;
  404. context.AddError(indexId, indexDefinition.Name, invalidDocId, e.ToString(), "Write");
  405. throw;
  406. }
  407. if (itemsInfo.ChangedDocs > 0)
  408. {
  409. WriteInMemoryIndexToDiskIfNecessary(itemsInfo.HighestETag);
  410. Flush(itemsInfo.HighestETag); // just make sure changes are flushed to disk
  411. UpdateIndexingStats(context, stats);
  412. }
  413. }
  414. finally
  415. {
  416. locker.Release();
  417. }
  418. }
  419. catch (Exception e)
  420. {
  421. throw new InvalidOperationException("Could not properly write to index " + PublicName, e);
  422. }
  423. finally
  424. {
  425. currentlyIndexDocuments.Clear();
  426. if (searchAnalyzer != null)
  427. searchAnalyzer.Close();
  428. foreach (Action dispose in toDispose)
  429. {
  430. dispose();
  431. }
  432. waitReason = null;
  433. LastIndexTime = SystemTime.UtcNow;
  434. }
  435. try
  436. {
  437. HandleCommitPoints(itemsInfo, GetCurrentSegmentsInfo());
  438. }
  439. catch (Exception e)
  440. {
  441. logIndexing.WarnException("Could not handle commit point properly, ignoring", e);
  442. }
  443. if (shouldRecreateSearcher)
  444. RecreateSearcher();
  445. }
  446. }
  447. private IndexSegmentsInfo GetCurrentSegmentsInfo()
  448. {
  449. if (directory is RAMDirectory)
  450. return null;
  451. return IndexStorage.GetCurrentSegmentsInfo(indexDefinition.Name, directory);
  452. }
  453. protected abstract void HandleCommitPoints(IndexedItemsInfo itemsInfo, IndexSegmentsInfo segmentsInfo);
  454. protected void UpdateIndexingStats(WorkContext workContext, IndexingWorkStats stats)
  455. {
  456. switch (stats.Operation)
  457. {
  458. case IndexingWorkStats.Status.Map:
  459. workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateIndexingStats(indexId, stats));
  460. break;
  461. case IndexingWorkStats.Status.Reduce:
  462. workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateReduceStats(indexId, stats));
  463. break;
  464. case IndexingWorkStats.Status.Ignore:
  465. break;
  466. default:
  467. throw new ArgumentOutOfRangeException();
  468. }
  469. }
  470. private void CreateIndexWriter()
  471. {
  472. snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
  473. IndexWriter.IndexReaderWarmer indexReaderWarmer = context.IndexReaderWarmers != null
  474. ? new IndexReaderWarmersWrapper(indexDefinition.Name, context.IndexReaderWarmers)
  475. : null;
  476. indexWriter = new RavenIndexWriter(directory, stopAnalyzer, snapshotter, IndexWriter.MaxFieldLength.UNLIMITED, context.Configuration.MaxIndexWritesBeforeRecreate, indexReaderWarmer);
  477. }
  478. private void WriteInMemoryIndexToDiskIfNecessary(Etag highestETag)
  479. {
  480. if (context.Configuration.RunInMemory ||
  481. context.IndexDefinitionStorage == null) // may happen during index startup
  482. return;
  483. var dir = indexWriter.Directory as RAMDirectory;
  484. if (dir == null)
  485. return;
  486. var stale = IsUpToDateEnoughToWriteToDisk(highestETag) == false;
  487. var toobig = dir.SizeInBytes() >= context.Configuration.NewIndexInMemoryMaxBytes;
  488. if (forceWriteToDisk || toobig || !stale)
  489. {
  490. indexWriter.Commit(highestETag);
  491. var fsDir = context.IndexStorage.MakeRAMDirectoryPhysical(dir, indexDefinition);
  492. IndexStorage.WriteIndexVersion(fsDir, indexDefinition);
  493. directory = fsDir;
  494. indexWriter.Dispose(true);
  495. dir.Dispose();
  496. CreateIndexWriter();
  497. }
  498. }
  499. protected abstract bool IsUpToDateEnoughToWriteToDisk(Etag highestETag);
  500. public RavenPerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose, bool forQuerying = false)
  501. {
  502. toDispose.Add(defaultAnalyzer.Close);
  503. string value;
  504. if (indexDefinition.Analyzers.TryGetValue(Constants.AllFields, out value))
  505. {
  506. defaultAnalyzer = IndexingExtensions.CreateAnalyzerInstance(Constants.AllFields, value);
  507. toDispose.Add(defaultAnalyzer.Close);
  508. }
  509. var perFieldAnalyzerWrapper = new RavenPerFieldAnalyzerWrapper(defaultAnalyzer);
  510. foreach (var analyzer in indexDefinition.Analyzers)
  511. {
  512. Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
  513. toDispose.Add(analyzerInstance.Close);
  514. if (forQuerying)
  515. {
  516. var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false);
  517. if (customAttributes.Length > 0)
  518. continue;
  519. }
  520. perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
  521. }
  522. StandardAnalyzer standardAnalyzer = null;
  523. KeywordAnalyzer keywordAnalyzer = null;
  524. foreach (var fieldIndexing in indexDefinition.Indexes)
  525. {
  526. switch (fieldIndexing.Value)
  527. {
  528. case FieldIndexing.NotAnalyzed:
  529. if (keywordAnalyzer == null)
  530. {
  531. keywordAnalyzer = new KeywordAnalyzer();
  532. toDispose.Add(keywordAnalyzer.Close);
  533. }
  534. perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
  535. break;
  536. case FieldIndexing.Analyzed:
  537. if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
  538. continue;
  539. if (standardAnalyzer == null)
  540. {
  541. standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
  542. toDispose.Add(standardAnalyzer.Close);
  543. }
  544. perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
  545. break;
  546. }
  547. }
  548. return perFieldAnalyzerWrapper;
  549. }
  550. protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats)
  551. {
  552. Action<Exception, object> onErrorFunc;
  553. return RobustEnumerationIndex(input, funcs, stats, out onErrorFunc);
  554. }
  555. protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats,out Action<Exception,object> onErrorFunc)
  556. {
  557. onErrorFunc = (exception, o) =>
  558. {
  559. string docId = null;
  560. var invalidSpatialException = exception as InvalidSpatialShapeException;
  561. if (invalidSpatialException != null)
  562. docId = invalidSpatialException.InvalidDocumentId;
  563. context.AddError(indexId,
  564. indexDefinition.Name,
  565. docId ?? TryGetDocKey(o),
  566. exception.Message,
  567. "Map"
  568. );
  569. logIndexing.WarnException(
  570. String.Format("Failed to execute indexing function on {0} on {1}", indexId,
  571. TryGetDocKey(o)),
  572. exception);
  573. stats.IndexingErrors++;
  574. };
  575. return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToProcessInSingleBatch)
  576. {
  577. BeforeMoveNext = () => Interlocked.Increment(ref stats.IndexingAttempts),
  578. CancelMoveNext = () => Interlocked.Decrement(ref stats.IndexingAttempts),
  579. OnError = onErrorFunc
  580. }.RobustEnumeration(input, funcs);
  581. }
  582. protected IEnumerable<object> RobustEnumerationReduce(IEnumerator<object> input, IndexingFunc func,
  583. IStorageActionsAccessor actions,
  584. IndexingWorkStats stats)
  585. {
  586. // not strictly accurate, but if we get that many errors, probably an error anyway.
  587. return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToProcessInSingleBatch)
  588. {
  589. BeforeMoveNext = () => Interlocked.Increment(ref stats.ReduceAttempts),
  590. CancelMoveNext = () => Interlocked.Decrement(ref stats.ReduceAttempts),
  591. OnError = (exception, o) =>
  592. {
  593. context.AddError(indexId,
  594. indexDefinition.Name,
  595. TryGetDocKey(o),
  596. exception.Message,
  597. "Reduce"
  598. );
  599. logIndexing.WarnException(
  600. String.Format("Failed to execute indexing function on {0} on {1}", indexId,
  601. TryGetDocKey(o)),
  602. exception);
  603. stats.ReduceErrors++;
  604. }
  605. }.RobustEnumeration(input, func);
  606. }
  607. // we don't care about tracking map/reduce stats here, since it is merely
  608. // an optimization step
  609. protected IEnumerable<object> RobustEnumerationReduceDuringMapPhase(IEnumerator<object> input, IndexingFunc func)
  610. {
  611. // not strictly accurate, but if we get that many errors, probably an error anyway.
  612. return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToProcessInSingleBatch)
  613. {
  614. BeforeMoveNext = () => { }, // don't care
  615. CancelMoveNext = () => { }, // don't care
  616. OnError = (exception, o) =>
  617. {
  618. context.AddError(indexId,
  619. indexDefinition.Name,
  620. TryGetDocKey(o),
  621. exception.Message,
  622. "Reduce"
  623. );
  624. logIndexing.WarnException(
  625. String.Format("Failed to execute indexing function on {0} on {1}", indexId,
  626. TryGetDocKey(o)),
  627. exception);
  628. }
  629. }.RobustEnumeration(input, func);
  630. }
  631. public static string TryGetDocKey(object current)
  632. {
  633. var dic = current as DynamicJsonObject;
  634. if (dic == null)
  635. return null;
  636. object value = dic.GetValue(Constants.DocumentIdFieldName) ??
  637. dic.GetValue(Constants.ReduceKeyFieldName);
  638. if (value != null)
  639. return value.ToString();
  640. return null;
  641. }
  642. public abstract void Remove(string[] keys, WorkContext context);
  643. internal IndexSearcherHolder.IndexSearcherHoldingState GetCurrentStateHolder()
  644. {
  645. return currentIndexSearcherHolder.GetCurrentStateHolder();
  646. }
  647. internal IDisposable GetSearcher(out IndexSearcher searcher)
  648. {
  649. return currentIndexSearcherHolder.GetSearcher(out searcher);
  650. }
  651. internal IDisposable GetSearcherAndTermsDocs(out IndexSearcher searcher, out RavenJObject[] termsDocs)
  652. {
  653. return currentIndexSearcherHolder.GetSearcherAndTermDocs(out searcher, out termsDocs);
  654. }
  655. private void RecreateSearcher()
  656. {
  657. if (indexWriter == null)
  658. {
  659. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(directory, true), wait: false);
  660. }
  661. else
  662. {
  663. var indexReader = indexWriter.GetReader();
  664. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(indexReader), wait: false);
  665. }
  666. }
  667. protected void AddDocumentToIndex(RavenIndexWriter currentIndexWriter, Document luceneDoc, Analyzer analyzer)
  668. {
  669. Analyzer newAnalyzer = AnalyzerGenerators.Aggregate(analyzer,
  670. (currentAnalyzer, generator) =>
  671. {
  672. Analyzer generateAnalyzer =
  673. generator.Value.GenerateAnalyzerForIndexing(indexId.ToString(), luceneDoc,
  674. currentAnalyzer);
  675. if (generateAnalyzer != currentAnalyzer &&
  676. currentAnalyzer != analyzer)
  677. currentAnalyzer.Close();
  678. return generateAnalyzer;
  679. });
  680. try
  681. {
  682. if (indexExtensions.Count > 0)
  683. currentlyIndexDocuments.Add(CloneDocument(luceneDoc));
  684. currentIndexWriter.AddDocument(luceneDoc, newAnalyzer);
  685. foreach (var fieldable in luceneDoc.GetFields())
  686. {
  687. using (fieldable.ReaderValue) // dispose all the readers
  688. {
  689. }
  690. }
  691. }
  692. finally
  693. {
  694. if (newAnalyzer != analyzer)
  695. newAnalyzer.Close();
  696. }
  697. }
  698. public void MarkQueried()
  699. {
  700. lastQueryTime = SystemTime.UtcNow;
  701. }
  702. public void MarkQueried(DateTime time)
  703. {
  704. lastQueryTime = time;
  705. }
  706. public IIndexExtension GetExtension(string indexExtensionKey)
  707. {
  708. IIndexExtension val;
  709. indexExtensions.TryGetValue(indexExtensionKey, out val);
  710. return val;
  711. }
  712. public IIndexExtension GetExtensionByPrefix(string indexExtensionKeyPrefix)
  713. {
  714. return indexExtensions.FirstOrDefault(x => x.Key.StartsWith(indexExtensionKeyPrefix)).Value;
  715. }
  716. public void SetExtension(string indexExtensionKey, IIndexExtension extension)
  717. {
  718. indexExtensions.TryAdd(indexExtensionKey, extension);
  719. }
  720. private static Document CloneDocument(Document luceneDoc)
  721. {
  722. var clonedDocument = new Document();
  723. foreach (AbstractField field in luceneDoc.GetFields())
  724. {
  725. var numericField = field as NumericField;
  726. if (numericField != null)
  727. {
  728. var clonedNumericField = new NumericField(numericField.Name,
  729. numericField.IsStored ? Field.Store.YES : Field.Store.NO,
  730. numericField.IsIndexed);
  731. var numericValue = numericField.NumericValue;
  732. if (numericValue is int)
  733. {
  734. clonedNumericField.SetIntValue((int)numericValue);
  735. }
  736. else if (numericValue is long)
  737. {
  738. clonedNumericField.SetLongValue((long)numericValue);
  739. }
  740. else if (numericValue is double)
  741. {
  742. clonedNumericField.SetDoubleValue((double)numericValue);
  743. }
  744. else if (numericValue is float)
  745. {
  746. clonedNumericField.SetFloatValue((float)numericValue);
  747. }
  748. clonedDocument.Add(clonedNumericField);
  749. }
  750. else
  751. {
  752. Field clonedField;
  753. if (field.IsBinary)
  754. {
  755. clonedField = new Field(field.Name, field.GetBinaryValue(),
  756. field.IsStored ? Field.Store.YES : Field.Store.NO);
  757. }
  758. else if (field.StringValue != null)
  759. {
  760. clonedField = new Field(field.Name, field.StringValue,
  761. field.IsStored ? Field.Store.YES : Field.Store.NO,
  762. field.IsIndexed ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NOT_ANALYZED_NO_NORMS,
  763. field.IsTermVectorStored ? Field.TermVector.YES : Field.TermVector.NO);
  764. }
  765. else
  766. {
  767. //probably token stream, and we can't handle fields with token streams, so we skip this.
  768. continue;
  769. }
  770. clonedDocument.Add(clonedField);
  771. }
  772. }
  773. return clonedDocument;
  774. }
  775. protected void LogIndexedDocument(string key, Document luceneDoc)
  776. {
  777. if (logIndexing.IsDebugEnabled)
  778. {
  779. var fieldsForLogging = luceneDoc.GetFields().Cast<IFieldable>().Select(x => new
  780. {
  781. Name = x.Name,
  782. Value = x.IsBinary ? "<binary>" : x.StringValue,
  783. Indexed = x.IsIndexed,
  784. Stored = x.IsStored,
  785. });
  786. var sb = new StringBuilder();
  787. foreach (var fieldForLogging in fieldsForLogging)
  788. {
  789. sb.Append("\t").Append(fieldForLogging.Name)
  790. .Append(" ")
  791. .Append(fieldForLogging.Indexed ? "I" : "-")
  792. .Append(fieldForLogging.Stored ? "S" : "-")
  793. .Append(": ")
  794. .Append(fieldForLogging.Value)
  795. .AppendLine();
  796. }
  797. logIndexing.Debug("Indexing on {0} result in index {1} gave document: {2}", key, indexId,
  798. sb.ToString());
  799. }
  800. }
  801. public static void AssertQueryDoesNotContainFieldsThatAreNotIndexed(IndexQuery indexQuery, AbstractViewGenerator viewGenerator)
  802. {
  803. if (string.IsNullOrWhiteSpace(indexQuery.Query) == false)
  804. {
  805. HashSet<string> hashSet = SimpleQueryParser.GetFields(indexQuery);
  806. foreach (string field in hashSet)
  807. {
  808. string f = field;
  809. if (f.EndsWith("_Range"))
  810. {
  811. f = f.Substring(0, f.Length - "_Range".Length);
  812. }
  813. if (viewGenerator.ContainsField(f) == false &&
  814. viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
  815. throw new ArgumentException("The field '" + f + "' is not indexed, cannot query on fields that are not indexed");
  816. }
  817. }
  818. if (indexQuery.SortedFields != null)
  819. {
  820. foreach (SortedField field in indexQuery.SortedFields)
  821. {
  822. string f = field.Field;
  823. if (f == Constants.TemporaryScoreValue)
  824. continue;
  825. if (f.EndsWith("_Range"))
  826. {
  827. f = f.Substring(0, f.Length - "_Range".Length);
  828. }
  829. if (f.StartsWith(Constants.RandomFieldName))
  830. continue;
  831. if (viewGenerator.ContainsField(f) == false && f != Constants.DistanceFieldName
  832. && viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
  833. throw new ArgumentException("The field '" + f + "' is not indexed, cannot sort on fields that are not indexed");
  834. }
  835. }
  836. }
  837. #region Nested type: IndexQueryOperation
  838. internal class IndexQueryOperation
  839. {
  840. FastVectorHighlighter highlighter;
  841. FieldQuery fieldQuery;
  842. private readonly IndexQuery indexQuery;
  843. private readonly Index parent;
  844. private readonly Func<IndexQueryResult, bool> shouldIncludeInResults;
  845. private readonly HashSet<RavenJObject> alreadyReturned;
  846. private readonly FieldsToFetch fieldsToFetch;
  847. private readonly HashSet<string> documentsAlreadySeenInPreviousPage = new HashSet<string>();
  848. private readonly OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers;
  849. private readonly List<string> reduceKeys;
  850. public IndexQueryOperation(Index parent, IndexQuery indexQuery, Func<IndexQueryResult, bool> shouldIncludeInResults, FieldsToFetch fieldsToFetch, OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers, List<string> reduceKeys = null)
  851. {
  852. this.parent = parent;
  853. this.indexQuery = indexQuery;
  854. this.shouldIncludeInResults = shouldIncludeInResults;
  855. this.fieldsToFetch = fieldsToFetch;
  856. this.indexQueryTriggers = indexQueryTriggers;
  857. this.reduceKeys = reduceKeys;
  858. if (fieldsToFetch.IsDistinctQuery)
  859. alreadyReturned = new HashSet<RavenJObject>(new RavenJTokenEqualityComparer());
  860. }
  861. public IEnumerable<RavenJObject> IndexEntries(Reference<int> totalResults)
  862. {
  863. parent.MarkQueried();
  864. using (IndexStorage.EnsureInvariantCulture())
  865. {
  866. AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
  867. IndexSearcher indexSearcher;
  868. RavenJObject[] termsDocs;
  869. using (parent.GetSearcherAndTermsDocs(out indexSearcher, out termsDocs))
  870. {
  871. var documentQuery = GetDocumentQuery();
  872. TopDocs search = ExecuteQuery(indexSearcher, documentQuery, indexQuery.Start, indexQuery.PageSize, indexQuery);
  873. totalResults.Value = search.TotalHits;
  874. for (int index = indexQuery.Start; index < search.ScoreDocs.Length; index++)
  875. {
  876. var scoreDoc = search.ScoreDocs[index];
  877. var ravenJObject = (RavenJObject)termsDocs[scoreDoc.Doc].CloneToken();
  878. foreach (var prop in ravenJObject.Where(x => x.Key.EndsWith("_Range")).ToArray())
  879. {
  880. ravenJObject.Remove(prop.Key);
  881. }
  882. if (reduceKeys == null)
  883. yield return ravenJObject;
  884. else
  885. {
  886. RavenJToken reduceKeyValue;
  887. if (ravenJObject.TryGetValue(Constants.ReduceKeyFieldName, out reduceKeyValue) && reduceKeys.Any(x => reduceKeyValue.Equals(new RavenJValue(x))))
  888. {
  889. yield return ravenJObject;
  890. }
  891. }
  892. }
  893. }
  894. }
  895. }
  896. public IEnumerable<IndexQueryResult> Query(CancellationToken token)
  897. {
  898. if (parent.Priority.HasFlag(IndexingPriority.Error))
  899. throw new IndexDisabledException("The index has been disabled due to errors");
  900. parent.MarkQueried();
  901. using (IndexStorage.EnsureInvariantCulture())
  902. {
  903. AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
  904. IndexSearcher indexSearcher;
  905. using (parent.GetSearcher(out indexSearcher))
  906. {
  907. var documentQuery = GetDocumentQuery();
  908. int start = indexQuery.Start;
  909. int pageSize = indexQuery.PageSize;
  910. int returnedResults = 0;
  911. int skippedResultsInCurrentLoop = 0;
  912. bool readAll;
  913. bool adjustStart = true;
  914. DuplicateDocumentRecorder recorder = null;
  915. if (indexQuery.SkipDuplicateChecking == false)
  916. recorder = new DuplicateDocumentRecorder(indexSearcher, parent, documentsAlreadySeenInPreviousPage,
  917. alreadyReturned, fieldsToFetch, parent.IsMapReduce || fieldsToFetch.IsProjection);
  918. do
  919. {
  920. if (skippedResultsInCurrentLoop > 0)
  921. {
  922. start = start + pageSize - (start - indexQuery.Start); // need to "undo" the index adjustment
  923. // trying to guesstimate how many results we will need to read from the index
  924. // to get enough unique documents to match the page size
  925. pageSize = Math.Max(2, skippedResultsInCurrentLoop) * pageSize;
  926. skippedResultsInCurrentLoop = 0;
  927. }
  928. TopDocs search;
  929. int moreRequired = 0;
  930. do
  931. {
  932. token.ThrowIfCancellationRequested();
  933. search = ExecuteQuery(indexSearcher, documentQuery, start, pageSize, indexQuery);
  934. if (recorder != null)
  935. {
  936. moreRequired = recorder.RecordResultsAlreadySeenForDistinctQuery(search, adjustStart, pageSize, ref start);
  937. pageSize += moreRequired*2;
  938. }
  939. } while (moreRequired > 0);
  940. indexQuery.TotalSize.Value = search.TotalHits;
  941. adjustStart = false;
  942. SetupHighlighter(documentQuery);
  943. for (var i = start; (i - start) < pageSize && i < search.ScoreDocs.Length; i++)
  944. {
  945. var scoreDoc = search.ScoreDocs[i];
  946. var document = indexSearcher.Doc(scoreDoc.Doc);
  947. var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, scoreDoc);
  948. if (ShouldIncludeInResults(indexQueryResult) == false)
  949. {
  950. indexQuery.SkippedResults.Value++;
  951. skippedResultsInCurrentLoop++;
  952. continue;
  953. }
  954. AddHighlighterResults(indexSearcher, scoreDoc, indexQueryResult);
  955. AddQueryExplanation(documentQuery, indexSearcher, scoreDoc, indexQueryResult);
  956. returnedResults++;
  957. yield return indexQueryResult;
  958. if (returnedResults == indexQuery.PageSize)
  959. yield break;
  960. }
  961. readAll = search.TotalHits == search.ScoreDocs.Length;
  962. } while (returnedResults < indexQuery.PageSize && readAll == false);
  963. }
  964. }
  965. }
  966. private void AddHighlighterResults(IndexSearcher indexSearcher, ScoreDoc scoreDoc, IndexQueryResult indexQueryResult)
  967. {
  968. if (highlighter == null)
  969. return;
  970. var highlightings =
  971. from highlightedField in this.indexQuery.HighlightedFields
  972. select new
  973. {
  974. highlightedField.Field,
  975. highlightedField.FragmentsField,
  976. Fragments = highlighter.GetBestFragments(
  977. fieldQuery,
  978. indexSearcher.IndexReader,
  979. scoreDoc.Doc,
  980. highlightedField.Field,
  981. highlightedField.FragmentLength,
  982. highlightedField.FragmentCount)
  983. }
  984. into fieldHighlitings
  985. where fieldHighlitings.Fragments != null &&
  986. fieldHighlitings.Fragments.Length > 0
  987. select fieldHighlitings;
  988. if (fieldsToFetch.IsProjection || parent.IsMapReduce)
  989. {
  990. foreach (var highlighting in highlightings)
  991. {
  992. if (!string.IsNullOrEmpty(highlighting.FragmentsField))
  993. {
  994. indexQueryResult.Projection[highlighting.FragmentsField] = new RavenJArray(highlighting.Fragments);
  995. }
  996. }
  997. }
  998. else
  999. {
  1000. indexQueryResult.Highligtings = highlightings.ToDictionary(x => x.Field, x => x.Fragments);
  1001. }
  1002. }
  1003. private void SetupHighlighter(Query documentQuery)
  1004. {
  1005. if (indexQuery.HighlightedFields != null && indexQuery.HighlightedFields.Length > 0)
  1006. {
  1007. highlighter = new FastVectorHighlighter(
  1008. FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
  1009. FastVectorHighlighter.DEFAULT_FIELD_MATCH,
  1010. new SimpleFragListBuilder(),
  1011. new SimpleFragmentsBuilder(
  1012. indexQuery.HighlighterPreTags != null && indexQuery.HighlighterPreTags.Any()
  1013. ? indexQuery.HighlighterPreTags
  1014. : BaseFragmentsBuilder.COLORED_PRE_TAGS,
  1015. indexQuery.HighlighterPostTags != null && indexQuery.HighlighterPostTags.Any()
  1016. ? indexQuery.HighlighterPostTags
  1017. : BaseFragmentsBuilder.COLORED_POST_TAGS));
  1018. fieldQuery = highlighter.GetFieldQuery(documentQuery);
  1019. }
  1020. }
  1021. private void AddQueryExplanation(Query documentQuery, IndexSearcher indexSearcher, ScoreDoc scoreDoc, IndexQueryResult indexQueryResult)
  1022. {
  1023. if(indexQuery.ExplainScores == false)
  1024. return;
  1025. var explanation = indexSearcher.Explain(documentQuery, scoreDoc.Doc);
  1026. indexQueryResult.ScoreExplanation = explanation.ToString();
  1027. }
  1028. private Query ApplyIndexTriggers(Query documentQuery)
  1029. {
  1030. documentQuery = indexQueryTriggers.Aggregate(documentQuery,
  1031. (current, indexQueryTrigger) =>
  1032. indexQueryTrigger.Value.ProcessQuery(parent.indexId.ToString(), current, indexQuery));
  1033. return documentQuery;
  1034. }
  1035. public IEnumerable<IndexQueryResult> IntersectionQuery(CancellationToken token)
  1036. {
  1037. using (IndexStorage.EnsureInvariantCulture())
  1038. {
  1039. AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
  1040. IndexSearcher indexSearcher;
  1041. using (parent.GetSearcher(out indexSearcher))
  1042. {
  1043. var subQueries = indexQuery.Query.Split(new[] { Constants.IntersectSeparator }, StringSplitOptions.RemoveEmptyEntries);
  1044. if (subQueries.Length <= 1)
  1045. throw new InvalidOperationException("Invalid INTERSECT query, must have multiple intersect clauses.");
  1046. //Not sure how to select the page size here??? The problem is that only docs in this search can be part
  1047. //of the final result because we're doing an intersection query (but we might exclude some of them)
  1048. int pageSizeBestGuess = (indexQuery.Start + indexQuery.PageSize) * 2;
  1049. int intersectMatches = 0, skippedResultsInCurrentLoop = 0;
  1050. int previousBaseQueryMatches = 0, currentBaseQueryMatches = 0;
  1051. var firstSubDocumentQuery = GetDocumentQuery(subQueries[0], indexQuery);
  1052. //Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for
  1053. var search = ExecuteQuery(indexSearcher, firstSubDocumentQuery, 0, pageSizeBestGuess, indexQuery);
  1054. currentBaseQueryMatches = search.ScoreDocs.Length;
  1055. var intersectionCollector = new IntersectionCollector(indexSearcher, search.ScoreDocs);
  1056. do
  1057. {
  1058. token.ThrowIfCancellationRequested();
  1059. if (skippedResultsInCurrentLoop > 0)
  1060. {
  1061. // We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated)
  1062. pageSizeBestGuess = pageSizeBestGuess * 2;
  1063. search = ExecuteQuery(indexSearcher, firstSubDocumentQuery, 0, pageSizeBestGuess, indexQuery);
  1064. previousBaseQueryMatches = currentBaseQueryMatches;
  1065. currentBaseQueryMatches = search.ScoreDocs.Length;
  1066. intersectionCollector = new IntersectionCollector(indexSearcher, search.ScoreDocs);
  1067. }
  1068. for (int i = 1; i < subQueries.Length; i++)
  1069. {
  1070. var luceneSubQuery = GetDocumentQuery(subQueries[i], indexQuery);
  1071. indexSearcher.Search(luceneSubQuery, null, intersectionCollector);
  1072. }
  1073. var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
  1074. intersectMatches = currentIntersectResults.Count;
  1075. skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches;
  1076. } while (intersectMatches < indexQuery.PageSize && //stop if we've got enough results to satisfy the pageSize
  1077. currentBaseQueryMatches < search.TotalHits && //stop if increasing the page size wouldn't make any difference
  1078. previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results
  1079. var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
  1080. //It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize,
  1081. //because it's before the INTERSECTION has been applied, so only some of those results make it out.
  1082. //Trying to give an accurate answer is going to be too costly, so we aren't going to try.
  1083. indexQuery.TotalSize.Value = search.TotalHits;
  1084. indexQuery.SkippedResults.Value = skippedResultsInCurrentLoop;
  1085. //Using the final set of results in the intersectionCollector
  1086. int returnedResults = 0;
  1087. for (int i = indexQuery.Start; i < intersectResults.Count && (i - indexQuery.Start) < pageSizeBestGuess; i++)
  1088. {
  1089. Document document = indexSearcher.Doc(intersectResults[i].LuceneId);
  1090. IndexQueryResult indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]);
  1091. if (ShouldIncludeInResults(indexQueryResult) == false)
  1092. {
  1093. indexQuery.SkippedResults.Value++;
  1094. skippedResultsInCurrentLoop++;
  1095. continue;
  1096. }
  1097. returnedResults++;
  1098. yield return indexQueryResult;
  1099. if (returnedResults == indexQuery.PageSize)
  1100. yield break;
  1101. }
  1102. }
  1103. }
  1104. }
  1105. private bool ShouldIncludeInResults(IndexQueryResult indexQueryResult)
  1106. {
  1107. if (shouldIncludeInResults(indexQueryResult) == false)
  1108. return false;
  1109. if (documentsAlreadySeenInPreviousPage.Contains(indexQueryResult.Key))
  1110. return false;
  1111. if (fieldsToFetch.IsDistinctQuery && alreadyReturned.Add(indexQueryResult.Projection) == false)
  1112. return false;
  1113. return true;
  1114. }
  1115. private void RecordResultsAlreadySeenForDistinctQuery(IndexSearcher indexSearcher, TopDocs search, int start, int pageSize)
  1116. {
  1117. var min = Math.Min(start, search.TotalHits);
  1118. // we are paging, we need to check that we don't have duplicates in the previous page
  1119. // see here for details: http://groups.google.com/group/ravendb/browse_frm/thread/d71c44aa9e2a7c6e
  1120. if (parent.IsMapReduce == false && fieldsToFetch.IsProjection == false && start - pageSize >= 0 && start < search.TotalHits)
  1121. {
  1122. for (int i = start - pageSize; i < min; i++)
  1123. {
  1124. var document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
  1125. documentsAlreadySeenInPreviousPage.Add(document.Get(Constants.DocumentIdFieldName));
  1126. }
  1127. }
  1128. if (fieldsToFetch.IsDistinctQuery == false)
  1129. return;
  1130. // add results that were already there in previous pages
  1131. for (int i = 0; i < min; i++)
  1132. {
  1133. Document document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
  1134. var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]);
  1135. alreadyReturned.Add(indexQueryResult.Projection);
  1136. }
  1137. }
  1138. public Query GetDocumentQuery()
  1139. {
  1140. var q = GetDocumentQuery(indexQuery.Query, indexQuery);
  1141. var spatialIndexQuery = indexQuery as SpatialIndexQuery;
  1142. if (spatialIndexQuery != null)
  1143. {
  1144. var spatialField = parent.viewGenerator.GetSpatialField(spatialIndexQuery.SpatialFieldName);
  1145. var dq = spatialField.MakeQuery(q, spatialField.GetStrategy(), spatialIndexQuery);
  1146. if (q is MatchAllDocsQuery) return dq;
  1147. var bq = new BooleanQuery { { q, Occur.MUST }, { dq, Occur.MUST } };
  1148. return bq;
  1149. }
  1150. return q;
  1151. }
  1152. private Query GetDocumentQuery(string query, IndexQuery indexQuery)
  1153. {
  1154. Query documentQuery;
  1155. if (String.IsNullOrEmpty(query))
  1156. {
  1157. logQuerying.Debug("Issuing query on index {0} for all documents", parent.indexId);
  1158. documentQuery = new MatchAllDocsQuery();
  1159. }
  1160. else
  1161. {
  1162. logQuerying.Debug("Issuing query on index {0} for: {1}", parent.indexId, query);
  1163. var toDispose = new List<Action>();
  1164. RavenPerFieldAnalyzerWrapper searchAnalyzer = null;
  1165. try
  1166. {
  1167. searchAnalyzer = parent.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
  1168. searchAnalyzer = parent.AnalyzerGenerators.Aggregate(searchAnalyzer, (currentAnalyzer, generator) =>
  1169. {
  1170. Analyzer newAnalyzer = generator.GenerateAnalyzerForQuerying(parent.indexId.ToString(), indexQuery.Query, currentAnalyzer);
  1171. if (newAnalyzer != currentAnalyzer)
  1172. {
  1173. DisposeAnalyzerAndFriends(toDispose, currentAnalyzer);
  1174. }
  1175. return parent.CreateAnalyzer(newAnalyzer, toDispose, true);
  1176. });
  1177. documentQuery = QueryBuilder.BuildQuery(query, indexQuery, searchAnalyzer);
  1178. }
  1179. finally
  1180. {
  1181. DisposeAnalyzerAndFriends(toDispose, searchAnalyzer);
  1182. }
  1183. }
  1184. return ApplyIndexTriggers(documentQuery);
  1185. }
  1186. private static void DisposeAnalyzerAndFriends(List<Action> toDispose, RavenPerFieldAnalyzerWrapper analyzer)
  1187. {
  1188. if (analyzer != null)
  1189. analyzer.Close();
  1190. foreach (Action dispose in toDispose)
  1191. {
  1192. dispose();
  1193. }
  1194. toDispose.Clear();
  1195. }
  1196. private TopDocs ExecuteQuery(IndexSearcher indexSearcher, Query documentQuery, int start, int pageSize,
  1197. IndexQuery indexQuery)
  1198. {
  1199. var sort = indexQuery.GetSort(parent.indexDefinition, parent.viewGenerator);
  1200. if (pageSize == Int32.MaxValue && sort == null) // we want all docs, no sorting required
  1201. {
  1202. var gatherAllCollector = new GatherAllCollector();
  1203. indexSearcher.Search(documentQuery, gatherAllCollector);
  1204. return gatherAllCollector.ToTopDocs();
  1205. }
  1206. int absFullPage = Math.Abs(pageSize + start); // need to protect against ridiculously high values of pageSize + start that overflow
  1207. var minPageSize = Math.Max(absFullPage, 1);
  1208. // NOTE: We get Start + Pagesize results back so we have something to page on
  1209. if (sort != null)
  1210. {
  1211. try
  1212. {
  1213. //indexSearcher.SetDefaultFieldSortScoring (sort.GetSort().Contains(SortField.FIELD_SCORE), false);
  1214. indexSearcher.SetDefaultFieldSortScoring(true, false);
  1215. var ret = indexSearcher.Search(documentQuery, null, minPageSize, sort);
  1216. return ret;
  1217. }
  1218. finally
  1219. {
  1220. indexSearcher.SetDefaultFieldSortScoring(false, false);
  1221. }
  1222. }
  1223. return indexSearcher.Search(documentQuery, null, minPageSize);
  1224. }
  1225. }
  1226. #endregion
  1227. public class DuplicateDocumentRecorder
  1228. {
  1229. private int min = -1;
  1230. private readonly bool isProjectionOrMapReduce;
  1231. private readonly Searchable indexSearcher;
  1232. private readonly Index parent;
  1233. private int alreadyScannedPositions, alreadyScannedPositionsForDistinct;
  1234. private readonly HashSet<string> documentsAlreadySeenInPreviousPage;
  1235. private readonly HashSet<RavenJObject> alreadyReturned;
  1236. private readonly FieldsToFetch fieldsToFetch;
  1237. public DuplicateDocumentRecorder(Searchable indexSearcher,
  1238. Index parent,
  1239. HashSet<string> documentsAlreadySeenInPreviousPage,
  1240. HashSet<RavenJObject> alreadyReturned,
  1241. FieldsToFetch fieldsToFetch,
  1242. bool isProjectionOrMapReduce)
  1243. {
  1244. this.indexSearcher = indexSearcher;
  1245. this.parent = parent;
  1246. this.isProjectionOrMapReduce = isProjectionOrMapReduce;
  1247. this.alreadyReturned = alreadyReturned;
  1248. this.fieldsToFetch = fieldsToFetch;
  1249. this.documentsAlreadySeenInPreviousPage = documentsAlreadySeenInPreviousPage;
  1250. }
  1251. public int RecordResultsAlreadySeenForDistinctQuery(TopDocs search, bool adjustStart, int pageSize, ref int start)
  1252. {
  1253. int itemsSkipped = 0;
  1254. if (min == -1)
  1255. {
  1256. min = start;
  1257. }
  1258. min = Math.Min(min, search.TotalHits);
  1259. // we are paging, we need to check that we don't have duplicates in the previous pages
  1260. // see here for details: http://groups.google.com/group/ravendb/browse_frm/thread/d71c44aa9e2a7c6e
  1261. if (isProjectionOrMapReduce == false)
  1262. {
  1263. for (int i = alreadyScannedPositions; i < min; i++)
  1264. {
  1265. if (i >= search.ScoreDocs.Length)
  1266. {
  1267. alreadyScannedPositions = i;
  1268. var pageSizeIncreaseSize = min - search.ScoreDocs.Length;
  1269. return pageSizeIncreaseSize;
  1270. }
  1271. var document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
  1272. var id = document.Get(Constants.DocumentIdFieldName);
  1273. if (documentsAlreadySeenInPreviousPage.Add(id) == false)
  1274. {
  1275. // already seen this, need to expand the range we are scanning because the user
  1276. // didn't take this into account
  1277. min = Math.Min(min + 1, search.TotalHits);
  1278. itemsSkipped++;
  1279. }
  1280. }
  1281. alreadyScannedPositions = min;
  1282. }
  1283. if (fieldsToFetch.IsDistinctQuery)
  1284. {
  1285. // add results that were already there in previous pages
  1286. for (int i = alreadyScannedPositionsForDistinct; i < min; i++)
  1287. {
  1288. if (i >= search.ScoreDocs.Length)
  1289. {
  1290. alreadyScannedPositionsForDistinct = i;
  1291. var pageSizeIncreaseSize = min - search.ScoreDocs.Length;
  1292. return pageSizeIncreaseSize;
  1293. }
  1294. Document document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
  1295. var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]);
  1296. if (indexQueryResult.Projection.Count > 0 && // we don't consider empty projections to be relevant for distinct operations
  1297. alreadyReturned.Add(indexQueryResult.Projection) == false)
  1298. {
  1299. min++; // we found a duplicate
  1300. itemsSkipped++;
  1301. }
  1302. }
  1303. alreadyScannedPositionsForDistinct = min;
  1304. }
  1305. if (adjustStart)
  1306. start += itemsSkipped;
  1307. return itemsSkipped;
  1308. }
  1309. }
  1310. public IndexingPerformanceStats[] GetIndexingPerformance()
  1311. {
  1312. return currentlyIndexing.Values.Concat(indexingPerformanceStats).ToArray();
  1313. }
  1314. public IndexingPerformanceStats[] GetCurrentIndexingPerformance()
  1315. {
  1316. return currentlyIndexing.Values.ToArray();
  1317. }
  1318. public void Backup(string backupDirectory, string path, string incrementalTag)
  1319. {
  1320. if (directory is RAMDirectory)
  1321. {
  1322. //if the index is memory-only, force writing index data to disk
  1323. Write((writer, analyzer, stats) =>
  1324. {
  1325. ForceWriteToDisk();
  1326. return new IndexedItemsInfo(GetLastEtagFromStats()) { ChangedDocs = 1 };
  1327. });
  1328. }
  1329. bool hasSnapshot = false;
  1330. bool throwOnFinallyException = true;
  1331. try
  1332. {
  1333. var existingFiles = new HashSet<string>();
  1334. if (incrementalTag != null)
  1335. backupDirectory = Path.Combine(backupDirectory, incrementalTag);
  1336. var allFilesPath = Path.Combine(backupDirectory, indexId + ".all-existing-index-files");
  1337. var saveToFolder = Path.Combine(backupDirectory, "Indexes", indexId.ToString());
  1338. System.IO.Directory.CreateDirectory(saveToFolder);
  1339. if (File.Exists(allFilesPath))
  1340. {
  1341. foreach (var file in File.ReadLines(allFilesPath))
  1342. {
  1343. existingFiles.Add(file);
  1344. }
  1345. }
  1346. var neededFilePath = Path.Combine(saveToFolder, "index-files.required-for-index-restore");
  1347. using (var allFilesWriter = File.Exists(allFilesPath) ? File.AppendText(allFilesPath) : File.CreateText(allFilesPath))
  1348. using (var neededFilesWriter = File.CreateText(neededFilePath))
  1349. {
  1350. try
  1351. {
  1352. // this is called for the side effect of creating the snapshotter and the writer
  1353. // we explicitly handle the backup outside of the write, to allow concurrent indexing
  1354. Write((writer, analyzer, stats) =>
  1355. {
  1356. // however, we copy the current segments.gen & index.version to make
  1357. // sure that we get the _at the time_ of the write.
  1358. foreach (var fileName in new[] { "segments.gen", IndexStorage.IndexVersionFileName(indexDefinition)})
  1359. {
  1360. var fullPath = Path.Combine(path, indexId.ToString(), fileName);
  1361. File.Copy(fullPath, Path.Combine(saveToFolder, fileName));
  1362. allFilesWriter.WriteLine(fileName);
  1363. neededFilesWriter.WriteLine(fileName);
  1364. }
  1365. return new IndexedItemsInfo(null);
  1366. });
  1367. }
  1368. catch (CorruptIndexException e)
  1369. {
  1370. logIndexing.WarnException(
  1371. "Could not backup index " + indexId +
  1372. " because it is corrupted. Skipping the index, will force index reset on restore", e);
  1373. neededFilesWriter.Dispose();
  1374. TryDelete(neededFilePath);
  1375. return;
  1376. }
  1377. var commit = snapshotter.Snapshot();
  1378. hasSnapshot = true;
  1379. foreach (var fileName in commit.FileNames)
  1380. {
  1381. var fullPath = Path.Combine(path, indexId.ToString(), fileName);
  1382. if (".lock".Equals(Path.GetExtension(fullPath), StringComparison.InvariantCultureIgnoreCase))
  1383. continue;
  1384. if (File.Exists(fullPath) == false)
  1385. continue;
  1386. if (existingFiles.Contains(fileName) == false)
  1387. {
  1388. var destFileName = Path.Combine(saveToFolder, fileName);
  1389. try
  1390. {
  1391. File.Copy(fullPath, destFileName);
  1392. }
  1393. catch (Exception e)
  1394. {
  1395. logIndexing.WarnException(
  1396. "Could not backup index " + indexId +
  1397. " because failed to copy file : " + fullPath + ". Skipping the index, will force index reset on restore", e);
  1398. neededFilesWriter.Dispose();
  1399. TryDelete(neededFilePath);
  1400. return;
  1401. }
  1402. allFilesWriter.WriteLine(fileName);
  1403. }
  1404. neededFilesWriter.WriteLine(fileName);
  1405. }
  1406. allFilesWriter.Flush();
  1407. neededFilesWriter.Flush();
  1408. }
  1409. }
  1410. catch
  1411. {
  1412. throwOnFinallyException = false;
  1413. throw;
  1414. }
  1415. finally
  1416. {
  1417. if (snapshotter != null && hasSnapshot)
  1418. {
  1419. try
  1420. {
  1421. snapshotter.Release();
  1422. }
  1423. catch
  1424. {
  1425. if (throwOnFinallyException)
  1426. throw;
  1427. }
  1428. }
  1429. }
  1430. }
  1431. public Etag GetLastEtagFromStats()
  1432. {
  1433. return context.IndexStorage.GetLastEtagForIndex(this);
  1434. }
  1435. private static void TryDelete(string neededFilePath)
  1436. {
  1437. try
  1438. {
  1439. File.Delete(neededFilePath);
  1440. }
  1441. catch (Exception)
  1442. {
  1443. }
  1444. }
  1445. protected void UpdateDocumentReferences(IStorageActionsAccessor actions,
  1446. ConcurrentQueue<IDictionary<string, HashSet<string>>> allReferencedDocs,
  1447. ConcurrentQueue<IDictionary<string, Etag>> missingReferencedDocs)
  1448. {
  1449. IDictionary<string, HashSet<string>> merged = new Dictionary<string, HashSet<string>>(StringComparer.InvariantCultureIgnoreCase);
  1450. IDictionary<string, HashSet<string>> result;
  1451. while (allReferencedDocs.TryDequeue(out result))
  1452. {
  1453. foreach (var kvp in result)
  1454. {
  1455. HashSet<string> set;
  1456. if (merged.TryGetValue(kvp.Key, out set))
  1457. {
  1458. logIndexing.Debug("Merging references for key = {0}, references = {1}", kvp.Key, String.Join(",", set));
  1459. set.UnionWith(kvp.Value);
  1460. }
  1461. else
  1462. {
  1463. merged.Add(kvp.Key, kvp.Value);
  1464. }
  1465. }
  1466. }
  1467. foreach (var referencedDocument in merged)
  1468. {
  1469. actions.Indexing.UpdateDocumentReferences(indexId, referencedDocument.Key, referencedDocument.Value);
  1470. actions.General.MaybePulseTransaction();
  1471. }
  1472. var task = new TouchReferenceDocumentIfChangedTask
  1473. {
  1474. Index = indexId, // so we will get IsStale properly
  1475. ReferencesToCheck = new Dictionary<string, Etag>(StringComparer.OrdinalIgnoreCase)
  1476. };
  1477. IDictionary<string, Etag> docs;
  1478. while (missingReferencedDocs.TryDequeue(out docs))
  1479. {
  1480. foreach (var doc in docs)
  1481. {
  1482. Etag etag;
  1483. if (task.ReferencesToCheck.TryGetValue(doc.Key, out etag) == false)
  1484. {
  1485. task.ReferencesToCheck[doc.Key] = doc.Value;
  1486. continue;
  1487. }
  1488. if (etag == doc.Value)
  1489. continue;
  1490. task.ReferencesToCheck[doc.Key] = Etag.InvalidEtag; // different etags, force a touch
  1491. }
  1492. logIndexing.Debug("Scheduled to touch documents: {0}", String.Join(";", task.ReferencesToCheck.Select(x => x.Key + ":" + x.Value)));
  1493. }
  1494. if (task.ReferencesToCheck.Count == 0)
  1495. return;
  1496. actions.Tasks.AddTask(task, SystemTime.UtcNow);
  1497. }
  1498. public void ForceWriteToDisk()
  1499. {
  1500. forceWriteToDisk = true;
  1501. }
  1502. protected void EnsureValidNumberOfOutputsForDocument(string sourceDocumentId, int numberOfAlreadyProducedOutputs)
  1503. {
  1504. var maxNumberOfIndexOutputs = indexDefinition.MaxIndexOutputsPerDocument ?? context.Configuration.MaxIndexOutputsPerDocument;
  1505. if (maxNumberOfIndexOutputs == -1)
  1506. return;
  1507. if (numberOfAlreadyProducedOutputs <= maxNumberOfIndexOutputs)
  1508. return;
  1509. Priority = IndexingPriority.Error;
  1510. // this cannot happen in the current transaction, since we are going to throw in just a bit.
  1511. using (context.Database.TransactionalStorage.DisableBatchNesting())
  1512. {
  1513. context.Database.TransactionalStorage.Batch(accessor =>
  1514. {
  1515. accessor.Indexing.SetIndexPriority(indexId, IndexingPriority.Error);
  1516. accessor.Indexing.TouchIndexEtag(indexId);
  1517. });
  1518. }
  1519. context.Database.Notifications.RaiseNotifications(new IndexChangeNotification()
  1520. {
  1521. Name = PublicName,
  1522. Type = IndexChangeTypes.IndexMarkedAsErrored
  1523. });
  1524. throw new InvalidOperationException(
  1525. string.Format(
  1526. "Index '{0}' has already produced {1} map results for a source document '{2}', while the allowed max number of outputs is {3} per one document. " +
  1527. "Index will be disabled. Please verify this index definition and consider a re-design of your entities.",
  1528. PublicName, numberOfAlreadyProducedOutputs, sourceDocumentId, maxNumberOfIndexOutputs));
  1529. }
  1530. internal class IndexByIdEqualityComparer : IEqualityComparer<Index>
  1531. {
  1532. public bool Equals(Index x, Index y)
  1533. {
  1534. return x.IndexId == y.IndexId;
  1535. }
  1536. public int GetHashCode(Index obj)
  1537. {
  1538. return obj.IndexId.GetHashCode();
  1539. }
  1540. }
  1541. }
  1542. }