PageRenderTime 83ms CodeModel.GetById 41ms RepoModel.GetById 0ms app.codeStats 1ms

/ToMigrate/Raven.Database/Indexing/Index.cs

http://github.com/ayende/ravendb
C# | 2107 lines | 1791 code | 260 blank | 56 comment | 292 complexity | ea98209eeedfce2c87d8653c618d85cd MD5 | raw file
Possible License(s): GPL-3.0, MPL-2.0-no-copyleft-exception, LGPL-2.1, Apache-2.0, BSD-3-Clause, CC-BY-SA-3.0
  1. //-----------------------------------------------------------------------
  2. // <copyright file="Index.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. //-----------------------------------------------------------------------
  6. using System;
  7. using System.Collections;
  8. using System.Collections.Concurrent;
  9. using System.Collections.Generic;
  10. using System.Collections.Specialized;
  11. using System.ComponentModel.Composition;
  12. using System.Diagnostics;
  13. using System.IO;
  14. using System.Linq;
  15. using System.Text;
  16. using System.Threading;
  17. using System.Threading.Tasks;
  18. using System.Web.UI;
  19. using Lucene.Net.Analysis;
  20. using Lucene.Net.Analysis.Standard;
  21. using Lucene.Net.Documents;
  22. using Lucene.Net.Index;
  23. using Lucene.Net.Search;
  24. using Lucene.Net.Search.Vectorhighlight;
  25. using Lucene.Net.Store;
  26. using Lucene.Net.Util;
  27. using Raven.Abstractions;
  28. using Raven.Abstractions.Data;
  29. using Raven.Abstractions.Exceptions;
  30. using Raven.Abstractions.Extensions;
  31. using Raven.Abstractions.Indexing;
  32. using Raven.Abstractions.Json.Linq;
  33. using Raven.Abstractions.Linq;
  34. using Raven.Abstractions.Logging;
  35. using Raven.Abstractions.MEF;
  36. using Raven.Database.Config;
  37. using Raven.Database.Config.Settings;
  38. using Raven.Database.Data;
  39. using Raven.Database.Extensions;
  40. using Raven.Database.Indexing.Analyzers;
  41. using Raven.Database.Linq;
  42. using Raven.Database.Plugins;
  43. using Raven.Database.Storage;
  44. using Raven.Database.Tasks;
  45. using Raven.Database.Util;
  46. using Raven.Json.Linq;
  47. using Constants = Raven.Abstractions.Data.Constants;
  48. using Directory = Lucene.Net.Store.Directory;
  49. using Document = Lucene.Net.Documents.Document;
  50. using Field = Lucene.Net.Documents.Field;
  51. using Version = Lucene.Net.Util.Version;
  52. namespace Raven.Database.Indexing
  53. {
  54. /// <summary>
  55. /// This is a thread safe, single instance for a particular index.
  56. /// </summary>
  57. public abstract class Index : IDisposable, ILowMemoryHandler
  58. {
  59. protected static readonly ILog logIndexing = LogManager.GetLogger(typeof(Index).FullName + ".Indexing");
  60. protected static readonly ILog logQuerying = LogManager.GetLogger(typeof(Index).FullName + ".Querying");
  61. private const long WriteErrorsLimit = 10;
  62. private readonly List<Document> currentlyIndexDocuments = new List<Document>();
  63. protected Directory directory;
  64. protected readonly IndexDefinition indexDefinition;
  65. private volatile string waitReason;
  66. private readonly Size flushSize;
  67. private long writeErrors;
  68. // Users sometimes configure index outputs without realizing that we need to count on that for memory
  69. // management. That can result in very small batch sizes, so we want to make sure that we don't trust
  70. // the user configuration, and use what is actually going on
  71. private int maxActualIndexOutput = 1;
  72. public IndexingPriority Priority { get; set; }
  73. /// <summary>
  74. /// Note, this might be written to be multiple threads at the same time
  75. /// We don't actually care for exact timing, it is more about general feeling
  76. /// </summary>
  77. private DateTime? lastQueryTime;
  78. private readonly ConcurrentDictionary<string, IIndexExtension> indexExtensions =
  79. new ConcurrentDictionary<string, IIndexExtension>();
  80. internal readonly int indexId;
  81. public int IndexId
  82. {
  83. get { return indexId; }
  84. }
  85. private readonly AbstractViewGenerator viewGenerator;
  86. protected readonly WorkContext context;
  87. private readonly object writeLock = new object();
  88. private volatile bool disposed;
  89. private RavenIndexWriter indexWriter;
  90. private SnapshotDeletionPolicy snapshotter;
  91. private readonly IndexSearcherHolder currentIndexSearcherHolder;
  92. private readonly ConcurrentDictionary<string, IndexingPerformanceStats> currentlyIndexing = new ConcurrentDictionary<string, IndexingPerformanceStats>();
  93. private readonly ConcurrentQueue<IndexingPerformanceStats> indexingPerformanceStats = new ConcurrentQueue<IndexingPerformanceStats>();
  94. private readonly static StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_30);
  95. private bool forceWriteToDisk;
  96. [CLSCompliant(false)]
  97. protected Index(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
  98. {
  99. currentIndexSearcherHolder = new IndexSearcherHolder(id, context);
  100. if (directory == null) throw new ArgumentNullException("directory");
  101. if (indexDefinition == null) throw new ArgumentNullException("indexDefinition");
  102. if (viewGenerator == null) throw new ArgumentNullException("viewGenerator");
  103. this.indexId = id;
  104. this.indexDefinition = indexDefinition;
  105. this.viewGenerator = viewGenerator;
  106. this.context = context;
  107. if (logIndexing.IsDebugEnabled)
  108. logIndexing.Debug("Creating index for {0}", PublicName);
  109. this.directory = directory;
  110. flushSize = context.Configuration.Indexing.FlushIndexToDiskSize;
  111. _indexCreationTime = SystemTime.UtcNow;
  112. RecreateSearcher();
  113. MemoryStatistics.RegisterLowMemoryHandler(this);
  114. }
  115. public int CurrentNumberOfItemsToIndexInSingleBatch { get; set; }
  116. [ImportMany]
  117. public OrderedPartCollection<AbstractAnalyzerGenerator> AnalyzerGenerators { get; set; }
  118. /// <summary>
  119. /// Whatever this is a map reduce index or not
  120. /// </summary>
  121. public abstract bool IsMapReduce { get; }
  122. public DateTime? LastQueryTime
  123. {
  124. get
  125. {
  126. return lastQueryTime;
  127. }
  128. }
  129. public DateTime LastIndexTime { get; set; }
  130. protected DateTime PreviousIndexTime { get; set; }
  131. public string IsOnRam
  132. {
  133. get
  134. {
  135. var ramDirectory = directory as RAMDirectory;
  136. if (ramDirectory == null)
  137. return "false";
  138. try
  139. {
  140. return "true (" + SizeHelper.Humane(ramDirectory.SizeInBytes()) + ")";
  141. }
  142. catch (AlreadyClosedException)
  143. {
  144. return "false";
  145. }
  146. }
  147. }
  148. public string PublicName { get { return indexDefinition.Name; } }
  149. public bool IsTestIndex
  150. {
  151. get { return indexDefinition.IsTestIndex; }
  152. }
  153. public int? MaxIndexOutputsPerDocument
  154. {
  155. get
  156. {
  157. if (maxActualIndexOutput == 1)
  158. return null;
  159. return maxActualIndexOutput;
  160. }
  161. }
  162. [CLSCompliant(false)]
  163. public volatile bool IsMapIndexingInProgress;
  164. private DateTime _indexCreationTime;
  165. protected IndexingPerformanceStats RecordCurrentBatch(string indexingStep, string operation, int itemsCount)
  166. {
  167. var performanceStats = new IndexingPerformanceStats
  168. {
  169. ItemsCount = itemsCount,
  170. Operation = indexingStep,
  171. Started = SystemTime.UtcNow,
  172. Operations = new BasePerformanceStats[0]
  173. };
  174. var lastStats = indexingPerformanceStats.LastOrDefault(x => x.Operation.Equals(operation, StringComparison.OrdinalIgnoreCase));
  175. if (lastStats != null)
  176. performanceStats.WaitingTimeSinceLastBatchCompleted = performanceStats.Started - lastStats.Completed;
  177. currentlyIndexing.AddOrUpdate(indexingStep, performanceStats, (s, stats) => performanceStats);
  178. return performanceStats;
  179. }
  180. protected void BatchCompleted(string indexingStep, string operation, int inputCount, int outputCount, List<BasePerformanceStats> operationStats)
  181. {
  182. IndexingPerformanceStats stats;
  183. if (currentlyIndexing.TryRemove(indexingStep, out stats))
  184. {
  185. stats.Completed = SystemTime.UtcNow;
  186. stats.Duration = stats.Completed - stats.Started;
  187. stats.Operation = operation;
  188. stats.InputCount = inputCount;
  189. stats.OutputCount = outputCount;
  190. stats.Operations = operationStats.ToArray();
  191. AddIndexingPerformanceStats(stats);
  192. }
  193. }
  194. public void AddIndexingPerformanceStats(IndexingPerformanceStats stats)
  195. {
  196. indexingPerformanceStats.Enqueue(stats);
  197. while (indexingPerformanceStats.Count > 25)
  198. indexingPerformanceStats.TryDequeue(out stats);
  199. }
  200. public void Dispose()
  201. {
  202. try
  203. {
  204. // this is here so we can give good logs in the case of a long shutdown process
  205. if (Monitor.TryEnter(writeLock, 100) == false)
  206. {
  207. var localReason = waitReason;
  208. if (localReason != null)
  209. logIndexing.Warn("Waiting for {0} to complete before disposing of index {1}, that might take a while if the server is very busy",
  210. localReason, PublicName);
  211. Monitor.Enter(writeLock);
  212. }
  213. disposed = true;
  214. foreach (var indexExtension in indexExtensions)
  215. {
  216. indexExtension.Value.Dispose();
  217. }
  218. if (currentIndexSearcherHolder != null)
  219. {
  220. var item = currentIndexSearcherHolder.SetIndexSearcher(null, PublicName, wait: true);
  221. if (item.WaitOne(TimeSpan.FromSeconds(5)) == false)
  222. {
  223. logIndexing.Warn("After closing the index searching, we waited for 5 seconds for the searching to be done, but it wasn't. Continuing with normal shutdown anyway.");
  224. }
  225. }
  226. try
  227. {
  228. EnsureIndexWriter();
  229. ForceWriteToDisk();
  230. WriteInMemoryIndexToDiskIfNecessary(GetLastEtagFromStats());
  231. }
  232. catch (Exception e)
  233. {
  234. logIndexing.ErrorException("Error while writing in memory index to disk.", e);
  235. }
  236. if (indexWriter != null) // just in case, WriteInMemoryIndexToDiskIfNecessary recreates writer
  237. {
  238. var writer = indexWriter;
  239. indexWriter = null;
  240. try
  241. {
  242. writer.Analyzer.Close();
  243. }
  244. catch (Exception e)
  245. {
  246. logIndexing.ErrorException("Error while closing the index (closing the analyzer failed)", e);
  247. }
  248. try
  249. {
  250. writer.Dispose();
  251. }
  252. catch (Exception e)
  253. {
  254. logIndexing.ErrorException("Error when closing the index", e);
  255. }
  256. }
  257. try
  258. {
  259. directory.Dispose();
  260. }
  261. catch (Exception e)
  262. {
  263. logIndexing.ErrorException("Error when closing the directory", e);
  264. }
  265. }
  266. finally
  267. {
  268. Monitor.Exit(writeLock);
  269. }
  270. }
  271. public void EnsureIndexWriter()
  272. {
  273. try
  274. {
  275. if (indexWriter == null)
  276. CreateIndexWriter();
  277. }
  278. catch (IOException e)
  279. {
  280. string msg = string.Format("Error when trying to create the index writer for index '{0}'.", this.PublicName);
  281. throw new IOException(msg, e);
  282. }
  283. }
  284. public void Flush(Etag highestETag)
  285. {
  286. try
  287. {
  288. lock (writeLock)
  289. {
  290. if (disposed)
  291. return;
  292. if (indexWriter == null)
  293. return;
  294. if (context.IndexStorage == null)
  295. return;
  296. waitReason = "Flush";
  297. try
  298. {
  299. try
  300. {
  301. indexWriter.Commit(highestETag);
  302. }
  303. catch (Exception e)
  304. {
  305. HandleWriteError(e);
  306. throw;
  307. }
  308. ResetWriteErrors();
  309. }
  310. finally
  311. {
  312. waitReason = null;
  313. }
  314. }
  315. }
  316. catch (Exception e)
  317. {
  318. HandleWriteError(e);
  319. throw new IOException("Error during flush for " + PublicName, e);
  320. }
  321. }
  322. public void MergeSegments()
  323. {
  324. lock (writeLock)
  325. {
  326. waitReason = "Merge / Optimize";
  327. try
  328. {
  329. logIndexing.Info("Starting merge of {0}", PublicName);
  330. var sp = Stopwatch.StartNew();
  331. EnsureIndexWriter();
  332. try
  333. {
  334. indexWriter.Optimize();
  335. }
  336. catch (Exception e)
  337. {
  338. HandleWriteError(e);
  339. throw;
  340. }
  341. logIndexing.Info("Done merging {0} - took {1}", indexId, sp.Elapsed);
  342. ResetWriteErrors();
  343. }
  344. finally
  345. {
  346. waitReason = null;
  347. }
  348. }
  349. }
  350. public abstract IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token);
  351. protected virtual IndexQueryResult RetrieveDocument(Document document, FieldsToFetch fieldsToFetch, ScoreDoc score)
  352. {
  353. return new IndexQueryResult
  354. {
  355. Score = score.Score,
  356. Key = document.Get(Constants.DocumentIdFieldName),
  357. Projection = (fieldsToFetch.IsProjection || fieldsToFetch.FetchAllStoredFields) ? CreateDocumentFromFields(document, fieldsToFetch) : null
  358. };
  359. }
  360. public static RavenJObject CreateDocumentFromFields(Document document, FieldsToFetch fieldsToFetch)
  361. {
  362. var documentFromFields = new RavenJObject();
  363. var fields = fieldsToFetch.Fields;
  364. if (fieldsToFetch.FetchAllStoredFields)
  365. fields = fields.Concat(document.GetFields().Select(x => x.Name));
  366. AddFieldsToDocument(document, new HashSet<string>(fields), documentFromFields);
  367. return documentFromFields;
  368. }
  369. protected static void AddFieldsToDocument(Document document, HashSet<string> fieldNames, RavenJObject documentFromFields)
  370. {
  371. foreach (var fldName in fieldNames)
  372. {
  373. if (fldName.EndsWith("_IsArray") ||
  374. fldName.EndsWith("_Range") ||
  375. fldName.EndsWith("_ConvertToJson"))
  376. continue;
  377. var isArray = fldName + "_IsArray";
  378. foreach (var field in document.GetFields(fldName))
  379. {
  380. var val = CreateProperty(field, document);
  381. RavenJToken arrayToken;
  382. var tryGetValue = documentFromFields.TryGetValue(field.Name, out arrayToken);
  383. if (tryGetValue || document.GetField(isArray) != null)
  384. {
  385. var array = arrayToken as RavenJArray;
  386. if (array == null)
  387. {
  388. documentFromFields[field.Name] = array =
  389. (tryGetValue ? new RavenJArray { arrayToken } : new RavenJArray());
  390. }
  391. array.Add(val);
  392. }
  393. else
  394. {
  395. documentFromFields[field.Name] = val;
  396. }
  397. }
  398. }
  399. }
  400. protected void InvokeOnIndexEntryDeletedOnAllBatchers(List<AbstractIndexUpdateTriggerBatcher> batchers, Term term)
  401. {
  402. if (!batchers.Any(batcher => batcher.RequiresDocumentOnIndexEntryDeleted)) return;
  403. // find all documents
  404. var key = term.Text;
  405. IndexSearcher searcher = null;
  406. using (GetSearcher(out searcher))
  407. {
  408. var collector = new GatherAllCollector();
  409. searcher.Search(new TermQuery(term), collector);
  410. var topDocs = collector.ToTopDocs();
  411. foreach (var scoreDoc in topDocs.ScoreDocs)
  412. {
  413. var document = searcher.Doc(scoreDoc.Doc);
  414. batchers.ApplyAndIgnoreAllErrors(
  415. exception =>
  416. {
  417. logIndexing.WarnException(
  418. string.Format(
  419. "Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
  420. PublicName, key),
  421. exception);
  422. context.AddError(indexId, PublicName, key, exception, "OnIndexEntryDeleted Trigger");
  423. },
  424. trigger => trigger.OnIndexEntryDeleted(key, document));
  425. }
  426. }
  427. }
  428. private static RavenJToken CreateProperty(Field fld, Document document)
  429. {
  430. if (fld.IsBinary)
  431. return fld.GetBinaryValue();
  432. var stringValue = fld.StringValue;
  433. if (document.GetField(fld.Name + "_ConvertToJson") != null)
  434. {
  435. var val = RavenJToken.Parse(fld.StringValue) as RavenJObject;
  436. return val;
  437. }
  438. if (stringValue == Constants.NullValue)
  439. stringValue = null;
  440. if (stringValue == Constants.EmptyString)
  441. stringValue = string.Empty;
  442. return stringValue;
  443. }
  444. protected void Write(Func<RavenIndexWriter, Analyzer, IndexingWorkStats, IndexedItemsInfo> action, List<PerformanceStats> writePerformanceStats = null)
  445. {
  446. if (disposed)
  447. throw new ObjectDisposedException("Index " + PublicName + " has been disposed");
  448. Stopwatch extensionExecutionDuration = null;
  449. Stopwatch flushToDiskDuration = null;
  450. Stopwatch recreateSearcherDuration = null;
  451. if (writePerformanceStats != null)
  452. {
  453. extensionExecutionDuration = new Stopwatch();
  454. flushToDiskDuration = new Stopwatch();
  455. recreateSearcherDuration = new Stopwatch();
  456. }
  457. PreviousIndexTime = LastIndexTime;
  458. LastIndexTime = SystemTime.UtcNow;
  459. lock (writeLock)
  460. {
  461. bool shouldRecreateSearcher;
  462. var toDispose = new List<Action>();
  463. Analyzer searchAnalyzer = null;
  464. var itemsInfo = new IndexedItemsInfo(null);
  465. bool flushed = false;
  466. try
  467. {
  468. waitReason = "Write";
  469. try
  470. {
  471. searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
  472. }
  473. catch (Exception e)
  474. {
  475. context.AddError(indexId, indexDefinition.Name, "Creating Analyzer", e, "Analyzer");
  476. throw;
  477. }
  478. EnsureIndexWriter();
  479. var locker = directory.MakeLock("writing-to-index.lock");
  480. try
  481. {
  482. var stats = new IndexingWorkStats();
  483. try
  484. {
  485. if (locker.Obtain() == false)
  486. {
  487. throw new InvalidOperationException(
  488. string.Format("Could not obtain the 'writing-to-index' lock of '{0}' index",
  489. PublicName));
  490. }
  491. itemsInfo = action(indexWriter, searchAnalyzer, stats);
  492. shouldRecreateSearcher = itemsInfo.ChangedDocs > 0;
  493. foreach (var indexExtension in indexExtensions.Values)
  494. {
  495. using (StopwatchScope.For(extensionExecutionDuration, resetBeforeStart: true))
  496. {
  497. indexExtension.OnDocumentsIndexed(currentlyIndexDocuments, searchAnalyzer);
  498. }
  499. IndexingOperation operation;
  500. if (writePerformanceStats != null && Enum.TryParse(string.Format("Extension_{0}", indexExtension.Name), out operation))
  501. {
  502. writePerformanceStats.Add(PerformanceStats.From(operation, extensionExecutionDuration.ElapsedMilliseconds));
  503. }
  504. }
  505. }
  506. catch (Exception e)
  507. {
  508. var invalidSpatialShapeException = e as InvalidSpatialShapException;
  509. var invalidDocId = (invalidSpatialShapeException == null) ?
  510. null :
  511. invalidSpatialShapeException.InvalidDocumentId;
  512. context.AddError(indexId, indexDefinition.Name, invalidDocId, e, "Write");
  513. throw;
  514. }
  515. if (itemsInfo.ChangedDocs > 0)
  516. {
  517. using (StopwatchScope.For(flushToDiskDuration))
  518. {
  519. WriteInMemoryIndexToDiskIfNecessary(itemsInfo.HighestETag);
  520. if (indexWriter != null && indexWriter.RamSize() >= flushSize)
  521. {
  522. Flush(itemsInfo.HighestETag); // just make sure changes are flushed to disk
  523. flushed = true;
  524. }
  525. }
  526. UpdateIndexingStats(context, stats);
  527. }
  528. }
  529. finally
  530. {
  531. locker.Release();
  532. }
  533. }
  534. catch (Exception e)
  535. {
  536. throw new InvalidOperationException("Could not properly write to index " + PublicName, e);
  537. }
  538. finally
  539. {
  540. currentlyIndexDocuments.Clear();
  541. if (searchAnalyzer != null)
  542. searchAnalyzer.Close();
  543. foreach (Action dispose in toDispose)
  544. {
  545. dispose();
  546. }
  547. waitReason = null;
  548. LastIndexTime = SystemTime.UtcNow;
  549. }
  550. if (flushed)
  551. {
  552. try
  553. {
  554. HandleCommitPoints(itemsInfo, GetCurrentSegmentsInfo());
  555. }
  556. catch (Exception e)
  557. {
  558. logIndexing.WarnException("Could not handle commit point properly, ignoring", e);
  559. }
  560. }
  561. if (shouldRecreateSearcher)
  562. {
  563. using (StopwatchScope.For(recreateSearcherDuration))
  564. {
  565. RecreateSearcher();
  566. }
  567. }
  568. }
  569. if (writePerformanceStats != null)
  570. {
  571. writePerformanceStats.Add(PerformanceStats.From(IndexingOperation.Lucene_FlushToDisk, flushToDiskDuration.ElapsedMilliseconds));
  572. writePerformanceStats.Add(PerformanceStats.From(IndexingOperation.Lucene_RecreateSearcher, recreateSearcherDuration.ElapsedMilliseconds));
  573. }
  574. }
  575. private IndexSegmentsInfo GetCurrentSegmentsInfo()
  576. {
  577. if (directory is RAMDirectory)
  578. return null;
  579. return IndexStorage.GetCurrentSegmentsInfo(indexDefinition.Name, directory);
  580. }
  581. protected abstract void HandleCommitPoints(IndexedItemsInfo itemsInfo, IndexSegmentsInfo segmentsInfo);
  582. protected void UpdateIndexingStats(WorkContext workContext, IndexingWorkStats stats)
  583. {
  584. // we'll try this for ten times, and if we get concurrency conflict, we do NOT fail, we'll retry
  585. // if we can't run even after ten times, we just give up. The stats might be a bit out, but that is fine for us
  586. bool run = true;
  587. for (int i = 0; i < 10 && run; i++)
  588. {
  589. run = false;
  590. switch (stats.Operation)
  591. {
  592. case IndexingWorkStats.Status.Map:
  593. workContext.TransactionalStorage.Batch(accessor =>
  594. {
  595. try
  596. {
  597. accessor.Indexing.UpdateIndexingStats(indexId, stats);
  598. }
  599. catch (Exception e)
  600. {
  601. if (accessor.IsWriteConflict(e))
  602. {
  603. run = true;
  604. return;
  605. }
  606. throw;
  607. }
  608. });
  609. break;
  610. case IndexingWorkStats.Status.Reduce:
  611. workContext.TransactionalStorage.Batch(accessor =>
  612. {
  613. try
  614. {
  615. accessor.Indexing.UpdateReduceStats(indexId, stats);
  616. }
  617. catch (Exception e)
  618. {
  619. if (accessor.IsWriteConflict(e))
  620. {
  621. run = true;
  622. return;
  623. }
  624. throw;
  625. }
  626. });
  627. break;
  628. case IndexingWorkStats.Status.Ignore:
  629. break;
  630. default:
  631. throw new ArgumentOutOfRangeException();
  632. }
  633. if (run)
  634. Thread.Sleep(11);
  635. }
  636. }
  637. private void CreateIndexWriter()
  638. {
  639. try
  640. {
  641. snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
  642. IndexWriter.IndexReaderWarmer indexReaderWarmer = context.IndexReaderWarmers != null
  643. ? new IndexReaderWarmersWrapper(indexDefinition.Name, context.IndexReaderWarmers)
  644. : null;
  645. indexWriter = new RavenIndexWriter(directory, stopAnalyzer, snapshotter, IndexWriter.MaxFieldLength.UNLIMITED, context.Configuration.Indexing.MaxWritesBeforeRecreate, indexReaderWarmer);
  646. }
  647. catch (Exception e)
  648. {
  649. HandleWriteError(e);
  650. throw new IOException("Failure to create index writer for " + PublicName, e);
  651. }
  652. }
  653. internal void WriteInMemoryIndexToDiskIfNecessary(Etag highestETag)
  654. {
  655. if (context.Configuration.Core.RunInMemory ||
  656. context.IndexDefinitionStorage == null) // may happen during index startup
  657. return;
  658. var dir = indexWriter.Directory as RAMDirectory;
  659. if (dir == null)
  660. return;
  661. var stale = IsUpToDateEnoughToWriteToDisk(highestETag) == false;
  662. var toobig = new Size(dir.SizeInBytes(), SizeUnit.Bytes) >= context.Configuration.Indexing.NewIndexInMemoryMaxSize;
  663. var tooOld = (SystemTime.UtcNow - _indexCreationTime) > context.Configuration.Indexing.NewIndexInMemoryMaxTime.AsTimeSpan;
  664. if (forceWriteToDisk || toobig || !stale || tooOld)
  665. {
  666. indexWriter.Commit(highestETag);
  667. var fsDir = context.IndexStorage.MakeRAMDirectoryPhysical(dir, indexDefinition);
  668. IndexStorage.WriteIndexVersion(fsDir, indexDefinition);
  669. directory = fsDir;
  670. indexWriter.Dispose(true);
  671. dir.Dispose();
  672. CreateIndexWriter();
  673. ResetWriteErrors();
  674. }
  675. }
  676. protected abstract bool IsUpToDateEnoughToWriteToDisk(Etag highestETag);
  677. public RavenPerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose, bool forQuerying = false)
  678. {
  679. toDispose.Add(defaultAnalyzer.Close);
  680. string value;
  681. if (indexDefinition.Analyzers.TryGetValue(Constants.AllFields, out value))
  682. {
  683. defaultAnalyzer = IndexingExtensions.CreateAnalyzerInstance(Constants.AllFields, value);
  684. toDispose.Add(defaultAnalyzer.Close);
  685. }
  686. var perFieldAnalyzerWrapper = new RavenPerFieldAnalyzerWrapper(defaultAnalyzer);
  687. foreach (var analyzer in indexDefinition.Analyzers)
  688. {
  689. Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
  690. toDispose.Add(analyzerInstance.Close);
  691. if (forQuerying)
  692. {
  693. var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false);
  694. if (customAttributes.Length > 0)
  695. continue;
  696. }
  697. perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
  698. }
  699. StandardAnalyzer standardAnalyzer = null;
  700. KeywordAnalyzer keywordAnalyzer = null;
  701. foreach (var fieldIndexing in indexDefinition.Indexes)
  702. {
  703. switch (fieldIndexing.Value)
  704. {
  705. case FieldIndexing.NotAnalyzed:
  706. if (keywordAnalyzer == null)
  707. {
  708. keywordAnalyzer = new KeywordAnalyzer();
  709. toDispose.Add(keywordAnalyzer.Close);
  710. }
  711. perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
  712. break;
  713. case FieldIndexing.Analyzed:
  714. if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
  715. continue;
  716. if (standardAnalyzer == null)
  717. {
  718. standardAnalyzer = new RavenStandardAnalyzer(Version.LUCENE_29);
  719. //standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
  720. toDispose.Add(standardAnalyzer.Close);
  721. }
  722. perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
  723. break;
  724. }
  725. }
  726. return perFieldAnalyzerWrapper;
  727. }
  728. protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats, Stopwatch linqExecutionDuration)
  729. {
  730. Action<Exception, object> onErrorFunc;
  731. return RobustEnumerationIndex(input, funcs, stats, out onErrorFunc, linqExecutionDuration);
  732. }
  733. protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats, out Action<Exception, object> onErrorFunc, Stopwatch linqExecutionDuration)
  734. {
  735. onErrorFunc = (exception, o) =>
  736. {
  737. string docId = null;
  738. var invalidSpatialException = exception as InvalidSpatialShapException;
  739. if (invalidSpatialException != null)
  740. docId = invalidSpatialException.InvalidDocumentId;
  741. context.AddError(indexId, indexDefinition.Name, docId ?? TryGetDocKey(o), exception, "Map");
  742. logIndexing.WarnException(
  743. String.Format("Failed to execute indexing function on {0} on {1}", indexDefinition.Name, TryGetDocKey(o)), exception);
  744. stats.IndexingErrors++;
  745. };
  746. return new RobustEnumerator(context.CancellationToken, context.Configuration.Core.MaxNumberOfItemsToProcessInSingleBatch,
  747. beforeMoveNext: () => Interlocked.Increment(ref stats.IndexingAttempts),
  748. cancelMoveNext: () => Interlocked.Decrement(ref stats.IndexingAttempts),
  749. onError: onErrorFunc)
  750. {
  751. MoveNextDuration = linqExecutionDuration
  752. }
  753. .RobustEnumeration(input, funcs);
  754. }
  755. protected IEnumerable<object> RobustEnumerationReduce(IEnumerator<object> input, IndexingFunc func, IndexingWorkStats stats, Stopwatch linqExecutionDuration)
  756. {
  757. // not strictly accurate, but if we get that many errors, probably an error anyway.
  758. return new RobustEnumerator(context.CancellationToken, context.Configuration.Core.MaxNumberOfItemsToProcessInSingleBatch,
  759. beforeMoveNext: () => Interlocked.Increment(ref stats.ReduceAttempts),
  760. cancelMoveNext: () => Interlocked.Decrement(ref stats.ReduceAttempts),
  761. onError: (exception, o) =>
  762. {
  763. var key = TryGetDocKey(o);
  764. context.AddError(indexId,
  765. indexDefinition.Name,
  766. key,
  767. exception,
  768. "Reduce"
  769. );
  770. logIndexing.WarnException(
  771. String.Format("Failed to execute indexing function on {0} on {1}", indexDefinition.Name,
  772. key),
  773. exception);
  774. stats.ReduceErrors++;
  775. })
  776. {
  777. MoveNextDuration = linqExecutionDuration
  778. }.RobustEnumeration(input, func);
  779. }
  780. // we don't care about tracking map/reduce stats here, since it is merely
  781. // an optimization step
  782. protected IEnumerable<object> RobustEnumerationReduceDuringMapPhase(IEnumerator<object> input, IndexingFunc func, Stopwatch reduceDuringMapLinqExecution)
  783. {
  784. // not strictly accurate, but if we get that many errors, probably an error anyway.
  785. return new RobustEnumerator(context.CancellationToken, context.Configuration.Core.MaxNumberOfItemsToProcessInSingleBatch,
  786. onError: (exception, o) =>
  787. {
  788. var keys = TryGetDocKeys(input, o);
  789. var concatenatedKeys = string.Join(";", keys);
  790. context.AddError(indexId,
  791. indexDefinition.Name,
  792. concatenatedKeys,
  793. exception,
  794. "Reduce"
  795. );
  796. logIndexing.WarnException(
  797. String.Format("Failed to execute indexing function on {0} on {1}", indexDefinition.Name,
  798. concatenatedKeys),
  799. exception);
  800. })
  801. {
  802. MoveNextDuration = reduceDuringMapLinqExecution
  803. }.RobustEnumeration(input, func);
  804. }
  805. private static IEnumerable<string> TryGetDocKeys(IEnumerator<object> input, object current)
  806. {
  807. var keys = new HashSet<string>();
  808. var key = TryGetDocKey(current);
  809. if (string.IsNullOrEmpty(key) == false)
  810. keys.Add(key);
  811. else
  812. {
  813. input.Reset();
  814. while (input.MoveNext())
  815. {
  816. key = TryGetDocKey(input.Current);
  817. if (string.IsNullOrEmpty(key))
  818. continue;
  819. keys.Add(key);
  820. }
  821. }
  822. return keys;
  823. }
  824. public static string TryGetDocKey(object current)
  825. {
  826. var dic = current as DynamicJsonObject;
  827. if (dic == null)
  828. return null;
  829. object value = dic.GetValue(Constants.DocumentIdFieldName) ??
  830. dic.GetValue(Constants.ReduceKeyFieldName);
  831. if (value != null)
  832. return value.ToString();
  833. return null;
  834. }
  835. public abstract void Remove(string[] keys, WorkContext context);
  836. internal IndexSearcherHolder.IndexSearcherHoldingState GetCurrentStateHolder()
  837. {
  838. return currentIndexSearcherHolder.GetCurrentStateHolder();
  839. }
  840. internal IDisposable GetSearcher(out IndexSearcher searcher)
  841. {
  842. return currentIndexSearcherHolder.GetSearcher(out searcher);
  843. }
  844. internal IDisposable GetSearcherAndTermsDocs(out IndexSearcher searcher, out RavenJObject[] termsDocs)
  845. {
  846. return currentIndexSearcherHolder.GetSearcherAndTermDocs(out searcher, out termsDocs);
  847. }
  848. private void RecreateSearcher()
  849. {
  850. if (indexWriter == null)
  851. {
  852. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(directory, true), PublicName, wait: false);
  853. }
  854. else
  855. {
  856. var indexReader = indexWriter.GetReader();
  857. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(indexReader), PublicName, wait: false);
  858. }
  859. }
  860. protected void AddDocumentToIndex(RavenIndexWriter currentIndexWriter, Document luceneDoc, Analyzer analyzer)
  861. {
  862. Analyzer newAnalyzer = AnalyzerGenerators.Aggregate(analyzer,
  863. (currentAnalyzer, generator) =>
  864. {
  865. Analyzer generateAnalyzer =
  866. generator.Value.GenerateAnalyzerForIndexing(PublicName, luceneDoc,
  867. currentAnalyzer);
  868. if (generateAnalyzer != currentAnalyzer &&
  869. currentAnalyzer != analyzer)
  870. currentAnalyzer.Close();
  871. return generateAnalyzer;
  872. });
  873. try
  874. {
  875. if (indexExtensions.Count > 0)
  876. currentlyIndexDocuments.Add(CloneDocument(luceneDoc));
  877. currentIndexWriter.AddDocument(luceneDoc, newAnalyzer);
  878. foreach (var fieldable in luceneDoc.GetFields())
  879. {
  880. using (fieldable.ReaderValue) // dispose all the readers
  881. {
  882. }
  883. }
  884. }
  885. finally
  886. {
  887. if (newAnalyzer != analyzer)
  888. newAnalyzer.Close();
  889. }
  890. }
  891. public void MarkQueried()
  892. {
  893. lastQueryTime = SystemTime.UtcNow;
  894. }
  895. public void MarkQueried(DateTime time)
  896. {
  897. if (lastQueryTime != null &&
  898. lastQueryTime.Value >= time)
  899. return;
  900. lastQueryTime = time;
  901. }
  902. public IIndexExtension GetExtension(string indexExtensionKey)
  903. {
  904. IIndexExtension val;
  905. indexExtensions.TryGetValue(indexExtensionKey, out val);
  906. return val;
  907. }
  908. public IIndexExtension GetExtensionByPrefix(string indexExtensionKeyPrefix)
  909. {
  910. return indexExtensions.FirstOrDefault(x => x.Key.StartsWith(indexExtensionKeyPrefix)).Value;
  911. }
  912. public void SetExtension(string indexExtensionKey, IIndexExtension extension)
  913. {
  914. indexExtensions.TryAdd(indexExtensionKey, extension);
  915. }
  916. private static Document CloneDocument(Document luceneDoc)
  917. {
  918. var clonedDocument = new Document();
  919. foreach (AbstractField field in luceneDoc.GetFields())
  920. {
  921. var numericField = field as NumericField;
  922. if (numericField != null)
  923. {
  924. var clonedNumericField = new NumericField(numericField.Name,
  925. numericField.IsStored ? Field.Store.YES : Field.Store.NO,
  926. numericField.IsIndexed);
  927. var numericValue = numericField.NumericValue;
  928. if (numericValue is int)
  929. {
  930. clonedNumericField.SetIntValue((int)numericValue);
  931. }
  932. else if (numericValue is long)
  933. {
  934. clonedNumericField.SetLongValue((long)numericValue);
  935. }
  936. else if (numericValue is double)
  937. {
  938. clonedNumericField.SetDoubleValue((double)numericValue);
  939. }
  940. else if (numericValue is float)
  941. {
  942. clonedNumericField.SetFloatValue((float)numericValue);
  943. }
  944. clonedDocument.Add(clonedNumericField);
  945. }
  946. else
  947. {
  948. Field clonedField;
  949. if (field.IsBinary)
  950. {
  951. clonedField = new Field(field.Name, field.GetBinaryValue(),
  952. field.IsStored ? Field.Store.YES : Field.Store.NO);
  953. }
  954. else if (field.StringValue != null)
  955. {
  956. clonedField = new Field(field.Name, field.StringValue,
  957. field.IsStored ? Field.Store.YES : Field.Store.NO,
  958. field.IsIndexed ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NOT_ANALYZED_NO_NORMS,
  959. field.IsTermVectorStored ? Field.TermVector.YES : Field.TermVector.NO);
  960. }
  961. else
  962. {
  963. //probably token stream, and we can't handle fields with token streams, so we skip this.
  964. continue;
  965. }
  966. clonedDocument.Add(clonedField);
  967. }
  968. }
  969. return clonedDocument;
  970. }
  971. protected void LogIndexedDocument(string key, Document luceneDoc)
  972. {
  973. if (!logIndexing.IsDebugEnabled)
  974. return;
  975. var fieldsForLogging = luceneDoc.GetFields().Select(x => new
  976. {
  977. x.Name,
  978. Value = x.IsBinary ? "<binary>" : x.StringValue,
  979. Indexed = x.IsIndexed,
  980. Stored = x.IsStored,
  981. });
  982. var sb = new StringBuilder();
  983. foreach (var fieldForLogging in fieldsForLogging)
  984. {
  985. sb.Append("\t").Append(fieldForLogging.Name)
  986. .Append(" ")
  987. .Append(fieldForLogging.Indexed ? "I" : "-")
  988. .Append(fieldForLogging.Stored ? "S" : "-")
  989. .Append(": ")
  990. .Append(fieldForLogging.Value)
  991. .AppendLine();
  992. }
  993. if (logIndexing.IsDebugEnabled)
  994. logIndexing.Debug("Indexing on {0} result in index {1} gave document: {2}", key, PublicName,
  995. sb.ToString());
  996. }
  997. [CLSCompliant(false)]
  998. public static void AssertQueryDoesNotContainFieldsThatAreNotIndexed(IndexQuery indexQuery, AbstractViewGenerator viewGenerator)
  999. {
  1000. if (string.IsNullOrWhiteSpace(indexQuery.Query) == false)
  1001. {
  1002. HashSet<string> hashSet = SimpleQueryParser.GetFields(indexQuery);
  1003. foreach (string field in hashSet)
  1004. {
  1005. string f = field;
  1006. if (f.EndsWith("_Range"))
  1007. {
  1008. f = f.Substring(0, f.Length - "_Range".Length);
  1009. }
  1010. if (viewGenerator.ContainsField(f) == false &&
  1011. viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
  1012. throw new ArgumentException("The field '" + f + "' is not indexed, cannot query on fields that are not indexed");
  1013. }
  1014. }
  1015. if (indexQuery.SortedFields != null)
  1016. {
  1017. foreach (SortedField sortedField in indexQuery.SortedFields)
  1018. {
  1019. string field = sortedField.Field;
  1020. if (field == Constants.TemporaryScoreValue)
  1021. continue;
  1022. if (field.EndsWith("_Range"))
  1023. {
  1024. field = field.Substring(0, field.Length - "_Range".Length);
  1025. }
  1026. if (field.StartsWith(Constants.RandomFieldName) || field.StartsWith(Constants.CustomSortFieldName))
  1027. continue;
  1028. if (field.StartsWith(Constants.AlphaNumericFieldName))
  1029. {
  1030. field = SortFieldHelper.CustomField(field).Name;
  1031. if (string.IsNullOrEmpty(field))
  1032. throw new ArgumentException("Alpha numeric sorting requires a field name");
  1033. }
  1034. if (viewGenerator.ContainsField(field) == false && !field.StartsWith(Constants.DistanceFieldName)
  1035. && viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
  1036. throw new ArgumentException("The field '" + field + "' is not indexed, cannot sort on fields that are not indexed");
  1037. }
  1038. }
  1039. }
  1040. #region Nested type: IndexQueryOperation
  1041. public class IndexQueryOperation
  1042. {
  1043. FastVectorHighlighter highlighter;
  1044. FieldQuery fieldQuery;
  1045. private readonly Stopwatch _queryParseDuration = new Stopwatch();
  1046. private readonly IndexQuery indexQuery;
  1047. private readonly Index parent;
  1048. private readonly Func<IndexQueryResult, bool> shouldIncludeInResults;
  1049. private readonly HashSet<RavenJObject> alreadySeenProjections;
  1050. private readonly FieldsToFetch fieldsToFetch;
  1051. private readonly HashSet<string> alreadySeenDocumentKeysInPreviousPage = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
  1052. private readonly OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers;
  1053. private readonly List<string> reduceKeys;
  1054. private bool hasMultipleIndexOutputs;
  1055. private int alreadyScannedForDuplicates;
  1056. public TimeSpan QueryParseDuration
  1057. {
  1058. get { return _queryParseDuration.Elapsed; }
  1059. }
  1060. public IndexQueryOperation(Index parent, IndexQuery indexQuery, Func<IndexQueryResult, bool> shouldIncludeInResults, FieldsToFetch fieldsToFetch, OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers, List<string> reduceKeys = null)
  1061. {
  1062. this.parent = parent;
  1063. this.indexQuery = indexQuery;
  1064. this.shouldIncludeInResults = shouldIncludeInResults;
  1065. this.fieldsToFetch = fieldsToFetch;
  1066. this.indexQueryTriggers = indexQueryTriggers;
  1067. this.reduceKeys = reduceKeys;
  1068. if (fieldsToFetch.IsDistinctQuery)
  1069. alreadySeenProjections = new HashSet<RavenJObject>(RavenJTokenEqualityComparer.Default);
  1070. }
  1071. public IEnumerable<RavenJObject> IndexEntries(Reference<int> totalResults)
  1072. {
  1073. var returnFullEntries = reduceKeys == null || reduceKeys.Count == 0;
  1074. HashSet<RavenJToken> reduceValuesJson = null;
  1075. if (returnFullEntries == false)
  1076. {
  1077. reduceValuesJson = new HashSet<RavenJToken>(RavenJTokenEqualityComparer.Default);
  1078. foreach (var reduceKey in reduceKeys)
  1079. {
  1080. reduceValuesJson.Add(new RavenJValue(reduceKey));
  1081. }
  1082. }
  1083. parent.MarkQueried();
  1084. using (CultureHelper.EnsureInvariantCulture())
  1085. {
  1086. AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
  1087. IndexSearcher indexSearcher;
  1088. RavenJObject[] termsDocs;
  1089. using (parent.GetSearcherAndTermsDocs(out indexSearcher, out termsDocs))
  1090. {
  1091. var documentQuery = GetDocumentQuery();
  1092. TopDocs search = ExecuteQuery(indexSearcher, documentQuery, indexQuery.Start, indexQuery.PageSize, indexQuery);
  1093. totalResults.Value = search.TotalHits;
  1094. for (int index = indexQuery.Start; index < search.ScoreDocs.Length; index++)
  1095. {
  1096. var scoreDoc = search.ScoreDocs[index];
  1097. var ravenJObject = (RavenJObject)termsDocs[scoreDoc.Doc].CloneToken();
  1098. foreach (var prop in ravenJObject.Where(x => x.Key.EndsWith("_Range")).ToArray())
  1099. {
  1100. ravenJObject.Remove(prop.Key);
  1101. }
  1102. if (returnFullEntries)
  1103. {
  1104. yield return ravenJObject;
  1105. continue;
  1106. }
  1107. RavenJToken reduceKeyValue;
  1108. if (ravenJObject.TryGetValue(Constants.ReduceKeyFieldName, out reduceKeyValue) && reduceValuesJson.Contains(reduceKeyValue))
  1109. {
  1110. yield return ravenJObject;
  1111. }
  1112. }
  1113. }
  1114. }
  1115. }
  1116. public IEnumerable<IndexQueryResult> Query(CancellationToken token)
  1117. {
  1118. if (parent.Priority.HasFlag(IndexingPriority.Error))
  1119. throw new IndexDisabledException("The index has been disabled due to errors");
  1120. parent.MarkQueried();
  1121. using (CultureHelper.EnsureInvariantCulture())
  1122. {
  1123. AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
  1124. IndexSearcher indexSearcher;
  1125. using (parent.GetSearcher(out indexSearcher))
  1126. {
  1127. var documentQuery = GetDocumentQuery();
  1128. int start = indexQuery.Start;
  1129. int pageSize = indexQuery.PageSize;
  1130. int returnedResults = 0;
  1131. bool endOfResults;
  1132. int maxNumberOfIndexOutputs;
  1133. if (parent.MaxIndexOutputsPerDocument != null)
  1134. {
  1135. hasMultipleIndexOutputs = true;
  1136. maxNumberOfIndexOutputs = parent.MaxIndexOutputsPerDocument.Value;
  1137. }
  1138. else
  1139. {
  1140. maxNumberOfIndexOutputs = parent.IsMapReduce ? parent.context.Configuration.Indexing.MaxMapReduceIndexOutputsPerDocument : parent.context.Configuration.Indexing.MaxSimpleIndexOutputsPerDocument;
  1141. if (maxNumberOfIndexOutputs == -1) // configuration was set to disable output count check, probably because there exist fanout indexes
  1142. maxNumberOfIndexOutputs = 50;
  1143. }
  1144. var docsToGet = pageSize;
  1145. var position = start;
  1146. do
  1147. {
  1148. token.ThrowIfCancellationRequested();
  1149. var search = ExecuteQuery(indexSearcher, documentQuery, start, docsToGet, indexQuery);
  1150. indexQuery.TotalSize.Value = search.TotalHits;
  1151. RecordAlreadyPagedItemsInPreviousPage(start, search, indexSearcher);
  1152. SetupHighlighter(documentQuery);
  1153. for (; position < search.ScoreDocs.Length && pageSize > 0; position++)
  1154. {
  1155. token.ThrowIfCancellationRequested();
  1156. var scoreDoc = search.ScoreDocs[position];
  1157. var document = indexSearcher.Doc(scoreDoc.Doc);
  1158. var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, scoreDoc);
  1159. if (indexQueryResult.Key == null && !string.IsNullOrEmpty(indexQuery.HighlighterKeyName))
  1160. {
  1161. indexQueryResult.HighlighterKey = document.Get(indexQuery.HighlighterKeyName);
  1162. }
  1163. if (ShouldIncludeInResults(indexQueryResult) == false)
  1164. {
  1165. indexQuery.SkippedResults.Value++;
  1166. continue;
  1167. }
  1168. AddHighlighterResults(indexSearcher, scoreDoc, indexQueryResult);
  1169. AddQueryExplanation(documentQuery, indexSearcher, scoreDoc, indexQueryResult);
  1170. returnedResults++;
  1171. yield return indexQueryResult;
  1172. if (returnedResults == pageSize)
  1173. yield break;
  1174. }
  1175. if (hasMultipleIndexOutputs)
  1176. docsToGet += (pageSize - returnedResults) * maxNumberOfIndexOutputs;
  1177. else
  1178. docsToGet += (pageSize - returnedResults);
  1179. endOfResults = search.TotalHits == search.ScoreDocs.Length;
  1180. } while (returnedResults < pageSize && endOfResults == false);
  1181. }
  1182. }
  1183. }
  1184. private void RecordAlreadyPagedItemsInPreviousPage(int start, TopDocs search, IndexSearcher indexSearcher)
  1185. {
  1186. if (start == 0)
  1187. return;
  1188. if (indexQuery.SkipDuplicateChecking)
  1189. return;
  1190. // we are paging, we need to check that we don't have duplicates in the previous pages
  1191. // see here for details: http://groups.google.com/group/ravendb/browse_frm/thread/d71c44aa9e2a7c6e
  1192. if (parent.IsMapReduce == false && fieldsToFetch.IsProjection == false && search.ScoreDocs.Length >= start)
  1193. {
  1194. if (IsSortingQuery(indexQuery))
  1195. {
  1196. // we need to scan all records from the beginning to requested 'start' position
  1197. for (int i = 0; i < start && i < search.ScoreDocs.Length; i++)
  1198. {
  1199. var scoreDoc = search.ScoreDocs[i];
  1200. var document = indexSearcher.Doc(scoreDoc.Doc);
  1201. var alreadyPagedKey = document.Get(Constants.DocumentIdFieldName);
  1202. alreadySeenDocumentKeysInPreviousPage.Add(alreadyPagedKey);
  1203. hasMultipleIndexOutputs = true;
  1204. }
  1205. }
  1206. else
  1207. {
  1208. // that's not a sorted query so we need just to ensure that we won't return the last item of the previous page
  1209. var scoreDoc = search.ScoreDocs[start - 1];
  1210. var document = indexSearcher.Doc(scoreDoc.Doc);
  1211. var alreadyPagedKey = document.Get(Constants.DocumentIdFieldName);
  1212. alreadySeenDocumentKeysInPreviousPage.Add(alreadyPagedKey);
  1213. hasMultipleIndexOutputs = true;
  1214. }
  1215. }
  1216. if (fieldsToFetch.IsDistinctQuery)
  1217. {
  1218. for (; alreadySeenProjections.Count < start && alreadyScannedForDuplicates < search.ScoreDocs.Length; alreadyScannedForDuplicates++)
  1219. {
  1220. var scoreDoc = search.ScoreDocs[alreadyScannedForDuplicates];
  1221. var document = indexSearcher.Doc(scoreDoc.Doc);
  1222. var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, scoreDoc);
  1223. if (indexQueryResult.Projection.Count > 0) // we don't consider empty projections to be relevant for distinct operations
  1224. {
  1225. alreadySeenProjections.Add(indexQueryResult.Projection);
  1226. }
  1227. }
  1228. }
  1229. }
  1230. private bool IsSortingQuery(IndexQuery query)
  1231. {
  1232. return query.SortedFields != null && query.SortedFields.Length > 0;
  1233. }
  1234. private void AddHighlighterResults(IndexSearcher indexSearcher, ScoreDoc scoreDoc, IndexQueryResult indexQueryResult)
  1235. {
  1236. if (highlighter == null)
  1237. return;
  1238. var highlightings =
  1239. (from highlightedField in this.indexQuery.HighlightedFields
  1240. select new
  1241. {
  1242. highlightedField.Field,
  1243. highlightedField.FragmentsField,
  1244. Fragments = highlighter.GetBestFragments(
  1245. fieldQuery,
  1246. indexSearcher.IndexReader,
  1247. scoreDoc.Doc,
  1248. highlightedField.Field,
  1249. highlightedField.FragmentLength,
  1250. highlightedField.FragmentCount)
  1251. }
  1252. into fieldHighlitings
  1253. where fieldHighlitings.Fragments != null &&
  1254. fieldHighlitings.Fragments.Length > 0
  1255. select fieldHighlitings).ToList();
  1256. if (indexQueryResult.Projection != null)
  1257. {
  1258. foreach (var highlighting in highlightings)
  1259. {
  1260. if (!string.IsNullOrEmpty(highlighting.FragmentsField))
  1261. {
  1262. indexQueryResult.Projection[highlighting.FragmentsField] = new RavenJArray(highlighting.Fragments);
  1263. }
  1264. }
  1265. }
  1266. indexQueryResult.Highligtings = highlightings.ToDictionary(x => x.Field, x => x.Fragments);
  1267. }
  1268. private void SetupHighlighter(Query documentQuery)
  1269. {
  1270. if (indexQuery.HighlightedFields != null && indexQuery.HighlightedFields.Length > 0)
  1271. {
  1272. highlighter = new FastVectorHighlighter(
  1273. FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
  1274. FastVectorHighlighter.DEFAULT_FIELD_MATCH,
  1275. new SimpleFragListBuilder(),
  1276. new SimpleFragmentsBuilder(
  1277. indexQuery.HighlighterPreTags != null && indexQuery.HighlighterPreTags.Any()
  1278. ? indexQuery.HighlighterPreTags
  1279. : BaseFragmentsBuilder.COLORED_PRE_TAGS,
  1280. indexQuery.HighlighterPostTags != null && indexQuery.HighlighterPostTags.Any()
  1281. ? indexQuery.HighlighterPostTags
  1282. : BaseFragmentsBuilder.COLORED_POST_TAGS));
  1283. fieldQuery = highlighter.GetFieldQuery(documentQuery);
  1284. }
  1285. }
  1286. private void AddQueryExplanation(Query documentQuery, IndexSearcher indexSearcher, ScoreDoc scoreDoc, IndexQueryResult indexQueryResult)
  1287. {
  1288. if (indexQuery.ExplainScores == false)
  1289. return;
  1290. var explanation = indexSearcher.Explain(documentQuery, scoreDoc.Doc);
  1291. indexQueryResult.ScoreExplanation = explanation.ToString();
  1292. }
  1293. private Query ApplyIndexTriggers(Query documentQuery)
  1294. {
  1295. documentQuery = indexQueryTriggers.Aggregate(documentQuery,
  1296. (current, indexQueryTrigger) =>
  1297. indexQueryTrigger.Value.ProcessQuery(parent.PublicName, current, indexQuery));
  1298. return documentQuery;
  1299. }
  1300. public IEnumerable<IndexQueryResult> IntersectionQuery(CancellationToken token)
  1301. {
  1302. using (CultureHelper.EnsureInvariantCulture())
  1303. {
  1304. AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
  1305. IndexSearcher indexSearcher;
  1306. using (parent.GetSearcher(out indexSearcher))
  1307. {
  1308. var subQueries = indexQuery.Query.Split(new[] { Constants.IntersectSeparator }, StringSplitOptions.RemoveEmptyEntries);
  1309. if (subQueries.Length <= 1)
  1310. throw new InvalidOperationException("Invalid INTERSECT query, must have multiple intersect clauses.");
  1311. //Not sure how to select the page size here??? The problem is that only docs in this search can be part
  1312. //of the final result because we're doing an intersection query (but we might exclude some of them)
  1313. int pageSizeBestGuess = (indexQuery.Start + indexQuery.PageSize) * 2;
  1314. int intersectMatches = 0, skippedResultsInCurrentLoop = 0;
  1315. int previousBaseQueryMatches = 0, currentBaseQueryMatches = 0;
  1316. var firstSubDocumentQuery = GetDocumentQuery(subQueries[0], indexQuery);
  1317. //Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for
  1318. var search = ExecuteQuery(indexSearcher, firstSubDocumentQuery, 0, pageSizeBestGuess, indexQuery);
  1319. currentBaseQueryMatches = search.ScoreDocs.Length;
  1320. var intersectionCollector = new IntersectionCollector(indexSearcher, search.ScoreDocs);
  1321. do
  1322. {
  1323. token.ThrowIfCancellationRequested();
  1324. if (skippedResultsInCurrentLoop > 0)
  1325. {
  1326. // We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated)
  1327. pageSizeBestGuess = pageSizeBestGuess * 2;
  1328. search = ExecuteQuery(indexSearcher, firstSubDocumentQuery, 0, pageSizeBestGuess, indexQuery);
  1329. previousBaseQueryMatches = currentBaseQueryMatches;
  1330. currentBaseQueryMatches = search.ScoreDocs.Length;
  1331. intersectionCollector = new IntersectionCollector(indexSearcher, search.ScoreDocs);
  1332. }
  1333. for (int i = 1; i < subQueries.Length; i++)
  1334. {
  1335. var luceneSubQuery = GetDocumentQuery(subQueries[i], indexQuery);
  1336. indexSearcher.Search(luceneSubQuery, null, intersectionCollector);
  1337. }
  1338. var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
  1339. intersectMatches = currentIntersectResults.Count;
  1340. skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches;
  1341. } while (intersectMatches < indexQuery.PageSize && //stop if we've got enough results to satisfy the pageSize
  1342. currentBaseQueryMatches < search.TotalHits && //stop if increasing the page size wouldn't make any difference
  1343. previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results
  1344. var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
  1345. //It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize,
  1346. //because it's before the INTERSECTION has been applied, so only some of those results make it out.
  1347. //Trying to give an accurate answer is going to be too costly, so we aren't going to try.
  1348. indexQuery.TotalSize.Value = search.TotalHits;
  1349. indexQuery.SkippedResults.Value = skippedResultsInCurrentLoop;
  1350. //Using the final set of results in the intersectionCollector
  1351. int returnedResults = 0;
  1352. for (int i = indexQuery.Start; i < intersectResults.Count && (i - indexQuery.Start) < pageSizeBestGuess; i++)
  1353. {
  1354. Document document = indexSearcher.Doc(intersectResults[i].LuceneId);
  1355. IndexQueryResult indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]);
  1356. if (ShouldIncludeInResults(indexQueryResult) == false)
  1357. {
  1358. indexQuery.SkippedResults.Value++;
  1359. skippedResultsInCurrentLoop++;
  1360. continue;
  1361. }
  1362. returnedResults++;
  1363. yield return indexQueryResult;
  1364. if (returnedResults == indexQuery.PageSize)
  1365. yield break;
  1366. }
  1367. }
  1368. }
  1369. }
  1370. private bool ShouldIncludeInResults(IndexQueryResult indexQueryResult)
  1371. {
  1372. if (shouldIncludeInResults(indexQueryResult) == false)
  1373. return false;
  1374. if (alreadySeenDocumentKeysInPreviousPage.Contains(indexQueryResult.Key))
  1375. {
  1376. hasMultipleIndexOutputs = true;
  1377. return false;
  1378. }
  1379. if (fieldsToFetch.IsDistinctQuery && alreadySeenProjections.Add(indexQueryResult.Projection) == false)
  1380. return false;
  1381. return true;
  1382. }
  1383. public Query GetDocumentQuery()
  1384. {
  1385. var q = GetDocumentQuery(indexQuery.Query, indexQuery);
  1386. var spatialIndexQuery = indexQuery as SpatialIndexQuery;
  1387. if (spatialIndexQuery != null)
  1388. {
  1389. var spatialField = parent.viewGenerator.GetSpatialField(spatialIndexQuery.SpatialFieldName);
  1390. var dq = spatialField.MakeQuery(q, spatialField.GetStrategy(), spatialIndexQuery);
  1391. if (q is MatchAllDocsQuery) return dq;
  1392. var bq = new BooleanQuery { { q, Occur.MUST }, { dq, Occur.MUST } };
  1393. return bq;
  1394. }
  1395. return q;
  1396. }
  1397. private Query GetDocumentQuery(string query, IndexQuery indexQuery)
  1398. {
  1399. _queryParseDuration.Start();
  1400. Query documentQuery;
  1401. if (String.IsNullOrEmpty(query))
  1402. {
  1403. if (logQuerying.IsDebugEnabled)
  1404. logQuerying.Debug("Issuing query on index {0} for all documents", parent.PublicName);
  1405. documentQuery = new MatchAllDocsQuery();
  1406. }
  1407. else
  1408. {
  1409. if (logQuerying.IsDebugEnabled)
  1410. logQuerying.Debug("Issuing query on index {0} for: {1}", parent.PublicName, query);
  1411. var toDispose = new List<Action>();
  1412. RavenPerFieldAnalyzerWrapper searchAnalyzer = null;
  1413. try
  1414. {
  1415. searchAnalyzer = parent.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
  1416. searchAnalyzer = parent.AnalyzerGenerators.Aggregate(searchAnalyzer, (currentAnalyzer, generator) =>
  1417. {
  1418. Analyzer newAnalyzer = generator.GenerateAnalyzerForQuerying(parent.PublicName, indexQuery.Query, currentAnalyzer);
  1419. if (newAnalyzer != currentAnalyzer)
  1420. {
  1421. DisposeAnalyzerAndFriends(toDispose, currentAnalyzer);
  1422. }
  1423. return parent.CreateAnalyzer(newAnalyzer, toDispose, true);
  1424. });
  1425. documentQuery = QueryBuilder.BuildQuery(query, indexQuery, searchAnalyzer);
  1426. }
  1427. finally
  1428. {
  1429. DisposeAnalyzerAndFriends(toDispose, searchAnalyzer);
  1430. }
  1431. }
  1432. var afterTriggers = ApplyIndexTriggers(documentQuery);
  1433. _queryParseDuration.Stop();
  1434. return afterTriggers;
  1435. }
  1436. private static void DisposeAnalyzerAndFriends(List<Action> toDispose, RavenPerFieldAnalyzerWrapper analyzer)
  1437. {
  1438. if (analyzer != null)
  1439. analyzer.Close();
  1440. foreach (Action dispose in toDispose)
  1441. {
  1442. dispose();
  1443. }
  1444. toDispose.Clear();
  1445. }
  1446. private TopDocs ExecuteQuery(IndexSearcher indexSearcher, Query documentQuery, int start, int pageSize,
  1447. IndexQuery indexQuery)
  1448. {
  1449. var sort = indexQuery.GetSort(parent.indexDefinition, parent.viewGenerator);
  1450. if (pageSize == Int32.MaxValue && sort == null) // we want all docs, no sorting required
  1451. {
  1452. var gatherAllCollector = new GatherAllCollector();
  1453. indexSearcher.Search(documentQuery, gatherAllCollector);
  1454. return gatherAllCollector.ToTopDocs();
  1455. }
  1456. int absFullPage = Math.Abs(pageSize + start); // need to protect against ridiculously high values of pageSize + start that overflow
  1457. var minPageSize = Math.Max(absFullPage, 1);
  1458. // NOTE: We get Start + Pagesize results back so we have something to page on
  1459. if (sort != null)
  1460. {
  1461. try
  1462. {
  1463. //indexSearcher.SetDefaultFieldSortScoring (sort.GetSort().Contains(SortField.FIELD_SCORE), false);
  1464. indexSearcher.SetDefaultFieldSortScoring(true, false);
  1465. var ret = indexSearcher.Search(documentQuery, null, minPageSize, sort);
  1466. return ret;
  1467. }
  1468. finally
  1469. {
  1470. indexSearcher.SetDefaultFieldSortScoring(false, false);
  1471. }
  1472. }
  1473. return indexSearcher.Search(documentQuery, null, minPageSize);
  1474. }
  1475. }
  1476. #endregion
  1477. public IndexingPerformanceStats[] GetIndexingPerformance()
  1478. {
  1479. return currentlyIndexing.Values.Concat(indexingPerformanceStats).ToArray();
  1480. }
  1481. public IndexingPerformanceStats[] GetCurrentIndexingPerformance()
  1482. {
  1483. return currentlyIndexing.Values.ToArray();
  1484. }
  1485. public void Backup(string backupDirectory, string path, string incrementalTag, Action<string, string, BackupStatus.BackupMessageSeverity> notifyCallback)
  1486. {
  1487. if (directory is RAMDirectory)
  1488. {
  1489. //if the index is memory-only, force writing index data to disk
  1490. Write((writer, analyzer, stats) =>
  1491. {
  1492. ForceWriteToDisk();
  1493. return new IndexedItemsInfo(GetLastEtagFromStats()) { ChangedDocs = 1 };
  1494. });
  1495. }
  1496. bool hasSnapshot = false;
  1497. bool throwOnFinallyException = true;
  1498. try
  1499. {
  1500. var existingFiles = new HashSet<string>();
  1501. if (incrementalTag != null)
  1502. backupDirectory = Path.Combine(backupDirectory, incrementalTag);
  1503. var allFilesPath = Path.Combine(backupDirectory, indexId + ".all-existing-index-files");
  1504. var saveToFolder = Path.Combine(backupDirectory, "Indexes", indexId.ToString());
  1505. System.IO.Directory.CreateDirectory(saveToFolder);
  1506. if (File.Exists(allFilesPath))
  1507. {
  1508. foreach (var file in File.ReadLines(allFilesPath))
  1509. {
  1510. existingFiles.Add(file);
  1511. }
  1512. }
  1513. var neededFilePath = Path.Combine(saveToFolder, "index-files.required-for-index-restore");
  1514. using (var allFilesWriter = File.Exists(allFilesPath) ? File.AppendText(allFilesPath) : File.CreateText(allFilesPath))
  1515. using (var neededFilesWriter = File.CreateText(neededFilePath))
  1516. {
  1517. try
  1518. {
  1519. // this is called for the side effect of creating the snapshotter and the writer
  1520. // we explicitly handle the backup outside of the write, to allow concurrent indexing
  1521. Write((writer, analyzer, stats) =>
  1522. {
  1523. // however, we copy the current segments.gen & index.version to make
  1524. // sure that we get the _at the time_ of the write.
  1525. foreach (var fileName in new[] { "segments.gen", IndexStorage.IndexVersionFileName(indexDefinition) })
  1526. {
  1527. var fullPath = Path.Combine(path, indexId.ToString(), fileName);
  1528. File.Copy(fullPath, Path.Combine(saveToFolder, fileName));
  1529. allFilesWriter.WriteLine(fileName);
  1530. neededFilesWriter.WriteLine(fileName);
  1531. }
  1532. return new IndexedItemsInfo(null);
  1533. });
  1534. }
  1535. catch (CorruptIndexException e)
  1536. {
  1537. var failureMessage = "Could not backup index " + PublicName + " because it is corrupted. Skipping the index, will force index reset on restore";
  1538. LogErrorAndNotifyStudio(notifyCallback, failureMessage, e);
  1539. neededFilesWriter.Dispose();
  1540. TryDelete(neededFilePath);
  1541. return;
  1542. }
  1543. IndexCommit commit;
  1544. try
  1545. {
  1546. commit = snapshotter.Snapshot();
  1547. hasSnapshot = true;
  1548. }
  1549. catch (Exception)
  1550. {
  1551. hasSnapshot = false;
  1552. commit = null;
  1553. }
  1554. if (hasSnapshot)
  1555. {
  1556. foreach (var fileName in commit.FileNames)
  1557. {
  1558. var fullPath = Path.Combine(path, indexId.ToString(), fileName);
  1559. if (".lock".Equals(Path.GetExtension(fullPath), StringComparison.InvariantCultureIgnoreCase))
  1560. continue;
  1561. if (File.Exists(fullPath) == false)
  1562. continue;
  1563. if (existingFiles.Contains(fileName) == false)
  1564. {
  1565. var destFileName = Path.Combine(saveToFolder, fileName);
  1566. try
  1567. {
  1568. File.Copy(fullPath, destFileName);
  1569. }
  1570. catch (Exception e)
  1571. {
  1572. var failureMessage = "Could not backup index " + PublicName + " because failed to copy file : " + fullPath +
  1573. ". Skipping the index, will force index reset on restore";
  1574. LogErrorAndNotifyStudio(notifyCallback, failureMessage, e);
  1575. neededFilesWriter.Dispose();
  1576. TryDelete(neededFilePath);
  1577. return;
  1578. }
  1579. allFilesWriter.WriteLine(fileName);
  1580. }
  1581. neededFilesWriter.WriteLine(fileName);
  1582. }
  1583. }
  1584. allFilesWriter.Flush();
  1585. neededFilesWriter.Flush();
  1586. }
  1587. }
  1588. catch (Exception e)
  1589. {
  1590. var failureMessage = "Could not backup index " + PublicName +
  1591. " because an unexpected exception was thrown. Skipping the index, will force index reset on restore";
  1592. LogErrorAndNotifyStudio(notifyCallback, failureMessage, e);
  1593. try
  1594. {
  1595. File.WriteAllText(Path.Combine(backupDirectory, String.Format("{0}.backup_failed", indexId)), e.ToString());
  1596. }
  1597. catch (Exception fe)
  1598. {
  1599. failureMessage = "failed to create fail index file for index " + PublicName +
  1600. " because an unexpected exception was thrown. This error may prevent auto reseting of the index on restore.";
  1601. LogErrorAndNotifyStudio(notifyCallback, failureMessage, fe);
  1602. throwOnFinallyException = false;
  1603. throw;
  1604. }
  1605. }
  1606. finally
  1607. {
  1608. if (snapshotter != null && hasSnapshot)
  1609. {
  1610. try
  1611. {
  1612. snapshotter.Release();
  1613. }
  1614. catch (Exception e)
  1615. {
  1616. var failureMessage = "Failed to release snapshotter while backing-up index " + PublicName;
  1617. LogErrorAndNotifyStudio(notifyCallback, failureMessage, e);
  1618. if (throwOnFinallyException) throw;
  1619. }
  1620. }
  1621. }
  1622. }
  1623. private static void LogErrorAndNotifyStudio(Action<string, string, BackupStatus.BackupMessageSeverity> notifyCallback, string failureMessage, Exception e)
  1624. {
  1625. logIndexing.WarnException(failureMessage, e);
  1626. if (notifyCallback != null)
  1627. notifyCallback(failureMessage, null, BackupStatus.BackupMessageSeverity.Error);
  1628. }
  1629. public Etag GetLastEtagFromStats()
  1630. {
  1631. if (context.IndexStorage == null) // startup
  1632. return Etag.Empty;
  1633. return context.IndexStorage.GetLastEtagForIndex(this);
  1634. }
  1635. private static void TryDelete(string neededFilePath)
  1636. {
  1637. try
  1638. {
  1639. File.Delete(neededFilePath);
  1640. }
  1641. catch (Exception)
  1642. {
  1643. }
  1644. }
  1645. protected void UpdateDocumentReferences(IStorageActionsAccessor actions,
  1646. ConcurrentQueue<IDictionary<string, HashSet<string>>> allReferencedDocs,
  1647. ConcurrentQueue<IDictionary<string, Etag>> missingReferencedDocs)
  1648. {
  1649. IDictionary<string, HashSet<string>> merged = new Dictionary<string, HashSet<string>>(StringComparer.InvariantCultureIgnoreCase);
  1650. IDictionary<string, HashSet<string>> result;
  1651. while (allReferencedDocs.TryDequeue(out result))
  1652. {
  1653. foreach (var kvp in result)
  1654. {
  1655. HashSet<string> set;
  1656. if (merged.TryGetValue(kvp.Key, out set))
  1657. {
  1658. if (logIndexing.IsDebugEnabled)
  1659. logIndexing.Debug("Merging references for key = {0}, references = {1}", kvp.Key, String.Join(",", set));
  1660. set.UnionWith(kvp.Value);
  1661. }
  1662. else
  1663. {
  1664. merged.Add(kvp.Key, kvp.Value);
  1665. }
  1666. }
  1667. }
  1668. foreach (var referencedDocument in merged)
  1669. {
  1670. actions.Indexing.UpdateDocumentReferences(indexId, referencedDocument.Key, referencedDocument.Value);
  1671. actions.General.MaybePulseTransaction();
  1672. }
  1673. // so we will get IsStale properly
  1674. var task = new TouchReferenceDocumentIfChangedTask(indexId);
  1675. IDictionary<string, Etag> docs;
  1676. while (missingReferencedDocs.TryDequeue(out docs))
  1677. {
  1678. foreach (var doc in docs)
  1679. {
  1680. task.UpdateReferenceToCheck(doc);
  1681. }
  1682. if (logIndexing.IsDebugEnabled && task.NumberOfKeys > 0)
  1683. logIndexing.Debug("Scheduled to touch documents: {0}", string.Join(";", task.GetReferencesForDebug()));
  1684. }
  1685. if (task.NumberOfKeys == 0)
  1686. return;
  1687. actions.Tasks.AddTask(task, SystemTime.UtcNow);
  1688. }
  1689. public void ForceWriteToDisk()
  1690. {
  1691. forceWriteToDisk = true;
  1692. }
  1693. protected bool EnsureValidNumberOfOutputsForDocument(string sourceDocumentId, int numberOfAlreadyProducedOutputs)
  1694. {
  1695. if (indexDefinition.MaxIndexOutputsPerDocument != null)
  1696. {
  1697. // user has specifically configured this value, but we don't trust it.
  1698. var actualIndexOutput = maxActualIndexOutput;
  1699. if (actualIndexOutput < numberOfAlreadyProducedOutputs)
  1700. {
  1701. // okay, now let verify that this is indeed the case, in thread safe manner,
  1702. // this way, most of the time we don't need volatile reads, and other sync operations
  1703. // in the code ensure we don't have too stale a view on the data (beside, stale view have
  1704. // to mean a smaller number, which we then verify).
  1705. actualIndexOutput = Thread.VolatileRead(ref maxActualIndexOutput);
  1706. while (actualIndexOutput < numberOfAlreadyProducedOutputs)
  1707. {
  1708. // if it changed, we don't care, it is just another max, and another thread probably
  1709. // set it for us, so we only retry if this is still smaller
  1710. actualIndexOutput = Interlocked.CompareExchange(
  1711. ref maxActualIndexOutput,
  1712. numberOfAlreadyProducedOutputs,
  1713. actualIndexOutput);
  1714. }
  1715. }
  1716. }
  1717. var maxNumberOfIndexOutputs = indexDefinition.MaxIndexOutputsPerDocument ??
  1718. (IsMapReduce ? context.Configuration.Indexing.MaxMapReduceIndexOutputsPerDocument : context.Configuration.Indexing.MaxSimpleIndexOutputsPerDocument);
  1719. if (maxNumberOfIndexOutputs == -1)
  1720. return true;
  1721. if (numberOfAlreadyProducedOutputs <= maxNumberOfIndexOutputs)
  1722. return true;
  1723. var msg = string.Format("Index '{0}' has already produced {1} map results for a source document '{2}', while the allowed max number of outputs is {3} per one document. " +
  1724. "Please verify this index definition and consider a re-design of your entities or index.",
  1725. PublicName, numberOfAlreadyProducedOutputs, sourceDocumentId, maxNumberOfIndexOutputs);
  1726. logIndexing.Warn(msg);
  1727. context.AddError(indexId, PublicName, sourceDocumentId, msg);
  1728. return false;
  1729. }
  1730. public void HandleWriteError(Exception e)
  1731. {
  1732. if (disposed)
  1733. return;
  1734. if (e.GetType() == typeof(SystemException)) // ignore transient errors
  1735. return;
  1736. bool indexCorrupted = false;
  1737. string errorMessage = null;
  1738. if (e is IOException)
  1739. {
  1740. errorMessage = string.Format("Index '{0}' got corrupted because it failed in writing to a disk with the following exception message: {1}." +
  1741. " The index priority was set to Error.", PublicName, e.Message);
  1742. indexCorrupted = true;
  1743. }
  1744. else
  1745. {
  1746. var errorCount = Interlocked.Increment(ref writeErrors);
  1747. if (errorCount >= WriteErrorsLimit)
  1748. {
  1749. errorMessage = string.Format("Index '{0}' failed {1} times to write data to a disk. The index priority was set to Error.", PublicName, errorCount);
  1750. indexCorrupted = true;
  1751. }
  1752. }
  1753. if (indexCorrupted == false || (Priority & IndexingPriority.Error) == IndexingPriority.Error)
  1754. return;
  1755. using (context.TransactionalStorage.DisableBatchNesting())
  1756. {
  1757. try
  1758. {
  1759. context.Database.TransactionalStorage.Batch(accessor => accessor.Indexing.SetIndexPriority(indexId, IndexingPriority.Error));
  1760. Priority = IndexingPriority.Error;
  1761. context.Database.Notifications.RaiseNotifications(new IndexChangeNotification
  1762. {
  1763. Name = PublicName,
  1764. Type = IndexChangeTypes.IndexMarkedAsErrored
  1765. });
  1766. if (string.IsNullOrEmpty(errorMessage))
  1767. throw new ArgumentException("Error message has to be set");
  1768. logIndexing.WarnException(errorMessage, e);
  1769. context.AddError(indexId, PublicName, null, e, errorMessage);
  1770. context.Database.AddAlert(new Alert
  1771. {
  1772. AlertLevel = AlertLevel.Error,
  1773. CreatedAt = SystemTime.UtcNow,
  1774. Message = errorMessage,
  1775. Title = string.Format("Index '{0}' marked as errored due to corruption", PublicName),
  1776. UniqueKey = string.Format("Index '{0}' errored, dbid: {1}", PublicName, context.Database.TransactionalStorage.Id),
  1777. });
  1778. }
  1779. catch (Exception ex)
  1780. {
  1781. logIndexing.WarnException(string.Format("Failed to handle corrupted {0} index", PublicName), ex);
  1782. }
  1783. }
  1784. }
  1785. private void ResetWriteErrors()
  1786. {
  1787. writeErrors = Interlocked.Exchange(ref writeErrors, 0);
  1788. }
  1789. internal class IndexByIdEqualityComparer : IEqualityComparer<Index>
  1790. {
  1791. public bool Equals(Index x, Index y)
  1792. {
  1793. return x.IndexId == y.IndexId;
  1794. }
  1795. public int GetHashCode(Index obj)
  1796. {
  1797. return obj.IndexId.GetHashCode();
  1798. }
  1799. }
  1800. public void HandleLowMemory()
  1801. {
  1802. bool tryEnter = false;
  1803. try
  1804. {
  1805. tryEnter = Monitor.TryEnter(writeLock);
  1806. if (tryEnter == false)
  1807. return;
  1808. try
  1809. {
  1810. // if in memory, flush to disk
  1811. if (indexWriter != null)
  1812. {
  1813. ForceWriteToDisk();
  1814. WriteInMemoryIndexToDiskIfNecessary(GetLastEtagFromStats());
  1815. }
  1816. }
  1817. catch (Exception e)
  1818. {
  1819. logIndexing.ErrorException("Error while writing in memory index to disk.", e);
  1820. }
  1821. RecreateSearcher();
  1822. }
  1823. finally
  1824. {
  1825. if (tryEnter)
  1826. Monitor.Exit(writeLock);
  1827. }
  1828. }
  1829. public void SoftMemoryRelease()
  1830. {
  1831. }
  1832. public LowMemoryHandlerStatistics GetStats()
  1833. {
  1834. var writerEstimator = new RamUsageEstimator(false);
  1835. return new LowMemoryHandlerStatistics()
  1836. {
  1837. Name = "Index",
  1838. DatabaseName = this.context.DatabaseName,
  1839. Metadata = new
  1840. {
  1841. IndexName = this.PublicName
  1842. },
  1843. EstimatedUsedMemory = writerEstimator.EstimateRamUsage(indexWriter)
  1844. };
  1845. }
  1846. }
  1847. }