PageRenderTime 60ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 1ms

/ToMigrate/Raven.Database/Indexing/Index.cs

http://github.com/ayende/ravendb
C# | 2107 lines | 1791 code | 260 blank | 56 comment | 292 complexity | ea98209eeedfce2c87d8653c618d85cd MD5 | raw file
Possible License(s): GPL-3.0, MPL-2.0-no-copyleft-exception, LGPL-2.1, Apache-2.0, BSD-3-Clause, CC-BY-SA-3.0

Large files files are truncated, but you can click here to view the full file

  1. //-----------------------------------------------------------------------
  2. // <copyright file="Index.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. //-----------------------------------------------------------------------
  6. using System;
  7. using System.Collections;
  8. using System.Collections.Concurrent;
  9. using System.Collections.Generic;
  10. using System.Collections.Specialized;
  11. using System.ComponentModel.Composition;
  12. using System.Diagnostics;
  13. using System.IO;
  14. using System.Linq;
  15. using System.Text;
  16. using System.Threading;
  17. using System.Threading.Tasks;
  18. using System.Web.UI;
  19. using Lucene.Net.Analysis;
  20. using Lucene.Net.Analysis.Standard;
  21. using Lucene.Net.Documents;
  22. using Lucene.Net.Index;
  23. using Lucene.Net.Search;
  24. using Lucene.Net.Search.Vectorhighlight;
  25. using Lucene.Net.Store;
  26. using Lucene.Net.Util;
  27. using Raven.Abstractions;
  28. using Raven.Abstractions.Data;
  29. using Raven.Abstractions.Exceptions;
  30. using Raven.Abstractions.Extensions;
  31. using Raven.Abstractions.Indexing;
  32. using Raven.Abstractions.Json.Linq;
  33. using Raven.Abstractions.Linq;
  34. using Raven.Abstractions.Logging;
  35. using Raven.Abstractions.MEF;
  36. using Raven.Database.Config;
  37. using Raven.Database.Config.Settings;
  38. using Raven.Database.Data;
  39. using Raven.Database.Extensions;
  40. using Raven.Database.Indexing.Analyzers;
  41. using Raven.Database.Linq;
  42. using Raven.Database.Plugins;
  43. using Raven.Database.Storage;
  44. using Raven.Database.Tasks;
  45. using Raven.Database.Util;
  46. using Raven.Json.Linq;
  47. using Constants = Raven.Abstractions.Data.Constants;
  48. using Directory = Lucene.Net.Store.Directory;
  49. using Document = Lucene.Net.Documents.Document;
  50. using Field = Lucene.Net.Documents.Field;
  51. using Version = Lucene.Net.Util.Version;
  52. namespace Raven.Database.Indexing
  53. {
  54. /// <summary>
  55. /// This is a thread safe, single instance for a particular index.
  56. /// </summary>
  57. public abstract class Index : IDisposable, ILowMemoryHandler
  58. {
  59. protected static readonly ILog logIndexing = LogManager.GetLogger(typeof(Index).FullName + ".Indexing");
  60. protected static readonly ILog logQuerying = LogManager.GetLogger(typeof(Index).FullName + ".Querying");
  61. private const long WriteErrorsLimit = 10;
  62. private readonly List<Document> currentlyIndexDocuments = new List<Document>();
  63. protected Directory directory;
  64. protected readonly IndexDefinition indexDefinition;
  65. private volatile string waitReason;
  66. private readonly Size flushSize;
  67. private long writeErrors;
  68. // Users sometimes configure index outputs without realizing that we need to count on that for memory
  69. // management. That can result in very small batch sizes, so we want to make sure that we don't trust
  70. // the user configuration, and use what is actually going on
  71. private int maxActualIndexOutput = 1;
  72. public IndexingPriority Priority { get; set; }
  73. /// <summary>
  74. /// Note, this might be written to be multiple threads at the same time
  75. /// We don't actually care for exact timing, it is more about general feeling
  76. /// </summary>
  77. private DateTime? lastQueryTime;
  78. private readonly ConcurrentDictionary<string, IIndexExtension> indexExtensions =
  79. new ConcurrentDictionary<string, IIndexExtension>();
  80. internal readonly int indexId;
  81. public int IndexId
  82. {
  83. get { return indexId; }
  84. }
  85. private readonly AbstractViewGenerator viewGenerator;
  86. protected readonly WorkContext context;
  87. private readonly object writeLock = new object();
  88. private volatile bool disposed;
  89. private RavenIndexWriter indexWriter;
  90. private SnapshotDeletionPolicy snapshotter;
  91. private readonly IndexSearcherHolder currentIndexSearcherHolder;
  92. private readonly ConcurrentDictionary<string, IndexingPerformanceStats> currentlyIndexing = new ConcurrentDictionary<string, IndexingPerformanceStats>();
  93. private readonly ConcurrentQueue<IndexingPerformanceStats> indexingPerformanceStats = new ConcurrentQueue<IndexingPerformanceStats>();
  94. private readonly static StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_30);
  95. private bool forceWriteToDisk;
  96. [CLSCompliant(false)]
  97. protected Index(Directory directory, int id, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
  98. {
  99. currentIndexSearcherHolder = new IndexSearcherHolder(id, context);
  100. if (directory == null) throw new ArgumentNullException("directory");
  101. if (indexDefinition == null) throw new ArgumentNullException("indexDefinition");
  102. if (viewGenerator == null) throw new ArgumentNullException("viewGenerator");
  103. this.indexId = id;
  104. this.indexDefinition = indexDefinition;
  105. this.viewGenerator = viewGenerator;
  106. this.context = context;
  107. if (logIndexing.IsDebugEnabled)
  108. logIndexing.Debug("Creating index for {0}", PublicName);
  109. this.directory = directory;
  110. flushSize = context.Configuration.Indexing.FlushIndexToDiskSize;
  111. _indexCreationTime = SystemTime.UtcNow;
  112. RecreateSearcher();
  113. MemoryStatistics.RegisterLowMemoryHandler(this);
  114. }
  115. public int CurrentNumberOfItemsToIndexInSingleBatch { get; set; }
  116. [ImportMany]
  117. public OrderedPartCollection<AbstractAnalyzerGenerator> AnalyzerGenerators { get; set; }
  118. /// <summary>
  119. /// Whatever this is a map reduce index or not
  120. /// </summary>
  121. public abstract bool IsMapReduce { get; }
  122. public DateTime? LastQueryTime
  123. {
  124. get
  125. {
  126. return lastQueryTime;
  127. }
  128. }
  129. public DateTime LastIndexTime { get; set; }
  130. protected DateTime PreviousIndexTime { get; set; }
  131. public string IsOnRam
  132. {
  133. get
  134. {
  135. var ramDirectory = directory as RAMDirectory;
  136. if (ramDirectory == null)
  137. return "false";
  138. try
  139. {
  140. return "true (" + SizeHelper.Humane(ramDirectory.SizeInBytes()) + ")";
  141. }
  142. catch (AlreadyClosedException)
  143. {
  144. return "false";
  145. }
  146. }
  147. }
  148. public string PublicName { get { return indexDefinition.Name; } }
  149. public bool IsTestIndex
  150. {
  151. get { return indexDefinition.IsTestIndex; }
  152. }
  153. public int? MaxIndexOutputsPerDocument
  154. {
  155. get
  156. {
  157. if (maxActualIndexOutput == 1)
  158. return null;
  159. return maxActualIndexOutput;
  160. }
  161. }
  162. [CLSCompliant(false)]
  163. public volatile bool IsMapIndexingInProgress;
  164. private DateTime _indexCreationTime;
  165. protected IndexingPerformanceStats RecordCurrentBatch(string indexingStep, string operation, int itemsCount)
  166. {
  167. var performanceStats = new IndexingPerformanceStats
  168. {
  169. ItemsCount = itemsCount,
  170. Operation = indexingStep,
  171. Started = SystemTime.UtcNow,
  172. Operations = new BasePerformanceStats[0]
  173. };
  174. var lastStats = indexingPerformanceStats.LastOrDefault(x => x.Operation.Equals(operation, StringComparison.OrdinalIgnoreCase));
  175. if (lastStats != null)
  176. performanceStats.WaitingTimeSinceLastBatchCompleted = performanceStats.Started - lastStats.Completed;
  177. currentlyIndexing.AddOrUpdate(indexingStep, performanceStats, (s, stats) => performanceStats);
  178. return performanceStats;
  179. }
  180. protected void BatchCompleted(string indexingStep, string operation, int inputCount, int outputCount, List<BasePerformanceStats> operationStats)
  181. {
  182. IndexingPerformanceStats stats;
  183. if (currentlyIndexing.TryRemove(indexingStep, out stats))
  184. {
  185. stats.Completed = SystemTime.UtcNow;
  186. stats.Duration = stats.Completed - stats.Started;
  187. stats.Operation = operation;
  188. stats.InputCount = inputCount;
  189. stats.OutputCount = outputCount;
  190. stats.Operations = operationStats.ToArray();
  191. AddIndexingPerformanceStats(stats);
  192. }
  193. }
  194. public void AddIndexingPerformanceStats(IndexingPerformanceStats stats)
  195. {
  196. indexingPerformanceStats.Enqueue(stats);
  197. while (indexingPerformanceStats.Count > 25)
  198. indexingPerformanceStats.TryDequeue(out stats);
  199. }
  200. public void Dispose()
  201. {
  202. try
  203. {
  204. // this is here so we can give good logs in the case of a long shutdown process
  205. if (Monitor.TryEnter(writeLock, 100) == false)
  206. {
  207. var localReason = waitReason;
  208. if (localReason != null)
  209. logIndexing.Warn("Waiting for {0} to complete before disposing of index {1}, that might take a while if the server is very busy",
  210. localReason, PublicName);
  211. Monitor.Enter(writeLock);
  212. }
  213. disposed = true;
  214. foreach (var indexExtension in indexExtensions)
  215. {
  216. indexExtension.Value.Dispose();
  217. }
  218. if (currentIndexSearcherHolder != null)
  219. {
  220. var item = currentIndexSearcherHolder.SetIndexSearcher(null, PublicName, wait: true);
  221. if (item.WaitOne(TimeSpan.FromSeconds(5)) == false)
  222. {
  223. logIndexing.Warn("After closing the index searching, we waited for 5 seconds for the searching to be done, but it wasn't. Continuing with normal shutdown anyway.");
  224. }
  225. }
  226. try
  227. {
  228. EnsureIndexWriter();
  229. ForceWriteToDisk();
  230. WriteInMemoryIndexToDiskIfNecessary(GetLastEtagFromStats());
  231. }
  232. catch (Exception e)
  233. {
  234. logIndexing.ErrorException("Error while writing in memory index to disk.", e);
  235. }
  236. if (indexWriter != null) // just in case, WriteInMemoryIndexToDiskIfNecessary recreates writer
  237. {
  238. var writer = indexWriter;
  239. indexWriter = null;
  240. try
  241. {
  242. writer.Analyzer.Close();
  243. }
  244. catch (Exception e)
  245. {
  246. logIndexing.ErrorException("Error while closing the index (closing the analyzer failed)", e);
  247. }
  248. try
  249. {
  250. writer.Dispose();
  251. }
  252. catch (Exception e)
  253. {
  254. logIndexing.ErrorException("Error when closing the index", e);
  255. }
  256. }
  257. try
  258. {
  259. directory.Dispose();
  260. }
  261. catch (Exception e)
  262. {
  263. logIndexing.ErrorException("Error when closing the directory", e);
  264. }
  265. }
  266. finally
  267. {
  268. Monitor.Exit(writeLock);
  269. }
  270. }
  271. public void EnsureIndexWriter()
  272. {
  273. try
  274. {
  275. if (indexWriter == null)
  276. CreateIndexWriter();
  277. }
  278. catch (IOException e)
  279. {
  280. string msg = string.Format("Error when trying to create the index writer for index '{0}'.", this.PublicName);
  281. throw new IOException(msg, e);
  282. }
  283. }
  284. public void Flush(Etag highestETag)
  285. {
  286. try
  287. {
  288. lock (writeLock)
  289. {
  290. if (disposed)
  291. return;
  292. if (indexWriter == null)
  293. return;
  294. if (context.IndexStorage == null)
  295. return;
  296. waitReason = "Flush";
  297. try
  298. {
  299. try
  300. {
  301. indexWriter.Commit(highestETag);
  302. }
  303. catch (Exception e)
  304. {
  305. HandleWriteError(e);
  306. throw;
  307. }
  308. ResetWriteErrors();
  309. }
  310. finally
  311. {
  312. waitReason = null;
  313. }
  314. }
  315. }
  316. catch (Exception e)
  317. {
  318. HandleWriteError(e);
  319. throw new IOException("Error during flush for " + PublicName, e);
  320. }
  321. }
  322. public void MergeSegments()
  323. {
  324. lock (writeLock)
  325. {
  326. waitReason = "Merge / Optimize";
  327. try
  328. {
  329. logIndexing.Info("Starting merge of {0}", PublicName);
  330. var sp = Stopwatch.StartNew();
  331. EnsureIndexWriter();
  332. try
  333. {
  334. indexWriter.Optimize();
  335. }
  336. catch (Exception e)
  337. {
  338. HandleWriteError(e);
  339. throw;
  340. }
  341. logIndexing.Info("Done merging {0} - took {1}", indexId, sp.Elapsed);
  342. ResetWriteErrors();
  343. }
  344. finally
  345. {
  346. waitReason = null;
  347. }
  348. }
  349. }
  350. public abstract IndexingPerformanceStats IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp, CancellationToken token);
  351. protected virtual IndexQueryResult RetrieveDocument(Document document, FieldsToFetch fieldsToFetch, ScoreDoc score)
  352. {
  353. return new IndexQueryResult
  354. {
  355. Score = score.Score,
  356. Key = document.Get(Constants.DocumentIdFieldName),
  357. Projection = (fieldsToFetch.IsProjection || fieldsToFetch.FetchAllStoredFields) ? CreateDocumentFromFields(document, fieldsToFetch) : null
  358. };
  359. }
  360. public static RavenJObject CreateDocumentFromFields(Document document, FieldsToFetch fieldsToFetch)
  361. {
  362. var documentFromFields = new RavenJObject();
  363. var fields = fieldsToFetch.Fields;
  364. if (fieldsToFetch.FetchAllStoredFields)
  365. fields = fields.Concat(document.GetFields().Select(x => x.Name));
  366. AddFieldsToDocument(document, new HashSet<string>(fields), documentFromFields);
  367. return documentFromFields;
  368. }
  369. protected static void AddFieldsToDocument(Document document, HashSet<string> fieldNames, RavenJObject documentFromFields)
  370. {
  371. foreach (var fldName in fieldNames)
  372. {
  373. if (fldName.EndsWith("_IsArray") ||
  374. fldName.EndsWith("_Range") ||
  375. fldName.EndsWith("_ConvertToJson"))
  376. continue;
  377. var isArray = fldName + "_IsArray";
  378. foreach (var field in document.GetFields(fldName))
  379. {
  380. var val = CreateProperty(field, document);
  381. RavenJToken arrayToken;
  382. var tryGetValue = documentFromFields.TryGetValue(field.Name, out arrayToken);
  383. if (tryGetValue || document.GetField(isArray) != null)
  384. {
  385. var array = arrayToken as RavenJArray;
  386. if (array == null)
  387. {
  388. documentFromFields[field.Name] = array =
  389. (tryGetValue ? new RavenJArray { arrayToken } : new RavenJArray());
  390. }
  391. array.Add(val);
  392. }
  393. else
  394. {
  395. documentFromFields[field.Name] = val;
  396. }
  397. }
  398. }
  399. }
  400. protected void InvokeOnIndexEntryDeletedOnAllBatchers(List<AbstractIndexUpdateTriggerBatcher> batchers, Term term)
  401. {
  402. if (!batchers.Any(batcher => batcher.RequiresDocumentOnIndexEntryDeleted)) return;
  403. // find all documents
  404. var key = term.Text;
  405. IndexSearcher searcher = null;
  406. using (GetSearcher(out searcher))
  407. {
  408. var collector = new GatherAllCollector();
  409. searcher.Search(new TermQuery(term), collector);
  410. var topDocs = collector.ToTopDocs();
  411. foreach (var scoreDoc in topDocs.ScoreDocs)
  412. {
  413. var document = searcher.Doc(scoreDoc.Doc);
  414. batchers.ApplyAndIgnoreAllErrors(
  415. exception =>
  416. {
  417. logIndexing.WarnException(
  418. string.Format(
  419. "Error when executed OnIndexEntryDeleted trigger for index '{0}', key: '{1}'",
  420. PublicName, key),
  421. exception);
  422. context.AddError(indexId, PublicName, key, exception, "OnIndexEntryDeleted Trigger");
  423. },
  424. trigger => trigger.OnIndexEntryDeleted(key, document));
  425. }
  426. }
  427. }
  428. private static RavenJToken CreateProperty(Field fld, Document document)
  429. {
  430. if (fld.IsBinary)
  431. return fld.GetBinaryValue();
  432. var stringValue = fld.StringValue;
  433. if (document.GetField(fld.Name + "_ConvertToJson") != null)
  434. {
  435. var val = RavenJToken.Parse(fld.StringValue) as RavenJObject;
  436. return val;
  437. }
  438. if (stringValue == Constants.NullValue)
  439. stringValue = null;
  440. if (stringValue == Constants.EmptyString)
  441. stringValue = string.Empty;
  442. return stringValue;
  443. }
  444. protected void Write(Func<RavenIndexWriter, Analyzer, IndexingWorkStats, IndexedItemsInfo> action, List<PerformanceStats> writePerformanceStats = null)
  445. {
  446. if (disposed)
  447. throw new ObjectDisposedException("Index " + PublicName + " has been disposed");
  448. Stopwatch extensionExecutionDuration = null;
  449. Stopwatch flushToDiskDuration = null;
  450. Stopwatch recreateSearcherDuration = null;
  451. if (writePerformanceStats != null)
  452. {
  453. extensionExecutionDuration = new Stopwatch();
  454. flushToDiskDuration = new Stopwatch();
  455. recreateSearcherDuration = new Stopwatch();
  456. }
  457. PreviousIndexTime = LastIndexTime;
  458. LastIndexTime = SystemTime.UtcNow;
  459. lock (writeLock)
  460. {
  461. bool shouldRecreateSearcher;
  462. var toDispose = new List<Action>();
  463. Analyzer searchAnalyzer = null;
  464. var itemsInfo = new IndexedItemsInfo(null);
  465. bool flushed = false;
  466. try
  467. {
  468. waitReason = "Write";
  469. try
  470. {
  471. searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
  472. }
  473. catch (Exception e)
  474. {
  475. context.AddError(indexId, indexDefinition.Name, "Creating Analyzer", e, "Analyzer");
  476. throw;
  477. }
  478. EnsureIndexWriter();
  479. var locker = directory.MakeLock("writing-to-index.lock");
  480. try
  481. {
  482. var stats = new IndexingWorkStats();
  483. try
  484. {
  485. if (locker.Obtain() == false)
  486. {
  487. throw new InvalidOperationException(
  488. string.Format("Could not obtain the 'writing-to-index' lock of '{0}' index",
  489. PublicName));
  490. }
  491. itemsInfo = action(indexWriter, searchAnalyzer, stats);
  492. shouldRecreateSearcher = itemsInfo.ChangedDocs > 0;
  493. foreach (var indexExtension in indexExtensions.Values)
  494. {
  495. using (StopwatchScope.For(extensionExecutionDuration, resetBeforeStart: true))
  496. {
  497. indexExtension.OnDocumentsIndexed(currentlyIndexDocuments, searchAnalyzer);
  498. }
  499. IndexingOperation operation;
  500. if (writePerformanceStats != null && Enum.TryParse(string.Format("Extension_{0}", indexExtension.Name), out operation))
  501. {
  502. writePerformanceStats.Add(PerformanceStats.From(operation, extensionExecutionDuration.ElapsedMilliseconds));
  503. }
  504. }
  505. }
  506. catch (Exception e)
  507. {
  508. var invalidSpatialShapeException = e as InvalidSpatialShapException;
  509. var invalidDocId = (invalidSpatialShapeException == null) ?
  510. null :
  511. invalidSpatialShapeException.InvalidDocumentId;
  512. context.AddError(indexId, indexDefinition.Name, invalidDocId, e, "Write");
  513. throw;
  514. }
  515. if (itemsInfo.ChangedDocs > 0)
  516. {
  517. using (StopwatchScope.For(flushToDiskDuration))
  518. {
  519. WriteInMemoryIndexToDiskIfNecessary(itemsInfo.HighestETag);
  520. if (indexWriter != null && indexWriter.RamSize() >= flushSize)
  521. {
  522. Flush(itemsInfo.HighestETag); // just make sure changes are flushed to disk
  523. flushed = true;
  524. }
  525. }
  526. UpdateIndexingStats(context, stats);
  527. }
  528. }
  529. finally
  530. {
  531. locker.Release();
  532. }
  533. }
  534. catch (Exception e)
  535. {
  536. throw new InvalidOperationException("Could not properly write to index " + PublicName, e);
  537. }
  538. finally
  539. {
  540. currentlyIndexDocuments.Clear();
  541. if (searchAnalyzer != null)
  542. searchAnalyzer.Close();
  543. foreach (Action dispose in toDispose)
  544. {
  545. dispose();
  546. }
  547. waitReason = null;
  548. LastIndexTime = SystemTime.UtcNow;
  549. }
  550. if (flushed)
  551. {
  552. try
  553. {
  554. HandleCommitPoints(itemsInfo, GetCurrentSegmentsInfo());
  555. }
  556. catch (Exception e)
  557. {
  558. logIndexing.WarnException("Could not handle commit point properly, ignoring", e);
  559. }
  560. }
  561. if (shouldRecreateSearcher)
  562. {
  563. using (StopwatchScope.For(recreateSearcherDuration))
  564. {
  565. RecreateSearcher();
  566. }
  567. }
  568. }
  569. if (writePerformanceStats != null)
  570. {
  571. writePerformanceStats.Add(PerformanceStats.From(IndexingOperation.Lucene_FlushToDisk, flushToDiskDuration.ElapsedMilliseconds));
  572. writePerformanceStats.Add(PerformanceStats.From(IndexingOperation.Lucene_RecreateSearcher, recreateSearcherDuration.ElapsedMilliseconds));
  573. }
  574. }
  575. private IndexSegmentsInfo GetCurrentSegmentsInfo()
  576. {
  577. if (directory is RAMDirectory)
  578. return null;
  579. return IndexStorage.GetCurrentSegmentsInfo(indexDefinition.Name, directory);
  580. }
  581. protected abstract void HandleCommitPoints(IndexedItemsInfo itemsInfo, IndexSegmentsInfo segmentsInfo);
  582. protected void UpdateIndexingStats(WorkContext workContext, IndexingWorkStats stats)
  583. {
  584. // we'll try this for ten times, and if we get concurrency conflict, we do NOT fail, we'll retry
  585. // if we can't run even after ten times, we just give up. The stats might be a bit out, but that is fine for us
  586. bool run = true;
  587. for (int i = 0; i < 10 && run; i++)
  588. {
  589. run = false;
  590. switch (stats.Operation)
  591. {
  592. case IndexingWorkStats.Status.Map:
  593. workContext.TransactionalStorage.Batch(accessor =>
  594. {
  595. try
  596. {
  597. accessor.Indexing.UpdateIndexingStats(indexId, stats);
  598. }
  599. catch (Exception e)
  600. {
  601. if (accessor.IsWriteConflict(e))
  602. {
  603. run = true;
  604. return;
  605. }
  606. throw;
  607. }
  608. });
  609. break;
  610. case IndexingWorkStats.Status.Reduce:
  611. workContext.TransactionalStorage.Batch(accessor =>
  612. {
  613. try
  614. {
  615. accessor.Indexing.UpdateReduceStats(indexId, stats);
  616. }
  617. catch (Exception e)
  618. {
  619. if (accessor.IsWriteConflict(e))
  620. {
  621. run = true;
  622. return;
  623. }
  624. throw;
  625. }
  626. });
  627. break;
  628. case IndexingWorkStats.Status.Ignore:
  629. break;
  630. default:
  631. throw new ArgumentOutOfRangeException();
  632. }
  633. if (run)
  634. Thread.Sleep(11);
  635. }
  636. }
  637. private void CreateIndexWriter()
  638. {
  639. try
  640. {
  641. snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
  642. IndexWriter.IndexReaderWarmer indexReaderWarmer = context.IndexReaderWarmers != null
  643. ? new IndexReaderWarmersWrapper(indexDefinition.Name, context.IndexReaderWarmers)
  644. : null;
  645. indexWriter = new RavenIndexWriter(directory, stopAnalyzer, snapshotter, IndexWriter.MaxFieldLength.UNLIMITED, context.Configuration.Indexing.MaxWritesBeforeRecreate, indexReaderWarmer);
  646. }
  647. catch (Exception e)
  648. {
  649. HandleWriteError(e);
  650. throw new IOException("Failure to create index writer for " + PublicName, e);
  651. }
  652. }
  653. internal void WriteInMemoryIndexToDiskIfNecessary(Etag highestETag)
  654. {
  655. if (context.Configuration.Core.RunInMemory ||
  656. context.IndexDefinitionStorage == null) // may happen during index startup
  657. return;
  658. var dir = indexWriter.Directory as RAMDirectory;
  659. if (dir == null)
  660. return;
  661. var stale = IsUpToDateEnoughToWriteToDisk(highestETag) == false;
  662. var toobig = new Size(dir.SizeInBytes(), SizeUnit.Bytes) >= context.Configuration.Indexing.NewIndexInMemoryMaxSize;
  663. var tooOld = (SystemTime.UtcNow - _indexCreationTime) > context.Configuration.Indexing.NewIndexInMemoryMaxTime.AsTimeSpan;
  664. if (forceWriteToDisk || toobig || !stale || tooOld)
  665. {
  666. indexWriter.Commit(highestETag);
  667. var fsDir = context.IndexStorage.MakeRAMDirectoryPhysical(dir, indexDefinition);
  668. IndexStorage.WriteIndexVersion(fsDir, indexDefinition);
  669. directory = fsDir;
  670. indexWriter.Dispose(true);
  671. dir.Dispose();
  672. CreateIndexWriter();
  673. ResetWriteErrors();
  674. }
  675. }
  676. protected abstract bool IsUpToDateEnoughToWriteToDisk(Etag highestETag);
  677. public RavenPerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose, bool forQuerying = false)
  678. {
  679. toDispose.Add(defaultAnalyzer.Close);
  680. string value;
  681. if (indexDefinition.Analyzers.TryGetValue(Constants.AllFields, out value))
  682. {
  683. defaultAnalyzer = IndexingExtensions.CreateAnalyzerInstance(Constants.AllFields, value);
  684. toDispose.Add(defaultAnalyzer.Close);
  685. }
  686. var perFieldAnalyzerWrapper = new RavenPerFieldAnalyzerWrapper(defaultAnalyzer);
  687. foreach (var analyzer in indexDefinition.Analyzers)
  688. {
  689. Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
  690. toDispose.Add(analyzerInstance.Close);
  691. if (forQuerying)
  692. {
  693. var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false);
  694. if (customAttributes.Length > 0)
  695. continue;
  696. }
  697. perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
  698. }
  699. StandardAnalyzer standardAnalyzer = null;
  700. KeywordAnalyzer keywordAnalyzer = null;
  701. foreach (var fieldIndexing in indexDefinition.Indexes)
  702. {
  703. switch (fieldIndexing.Value)
  704. {
  705. case FieldIndexing.NotAnalyzed:
  706. if (keywordAnalyzer == null)
  707. {
  708. keywordAnalyzer = new KeywordAnalyzer();
  709. toDispose.Add(keywordAnalyzer.Close);
  710. }
  711. perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
  712. break;
  713. case FieldIndexing.Analyzed:
  714. if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
  715. continue;
  716. if (standardAnalyzer == null)
  717. {
  718. standardAnalyzer = new RavenStandardAnalyzer(Version.LUCENE_29);
  719. //standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
  720. toDispose.Add(standardAnalyzer.Close);
  721. }
  722. perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
  723. break;
  724. }
  725. }
  726. return perFieldAnalyzerWrapper;
  727. }
  728. protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats, Stopwatch linqExecutionDuration)
  729. {
  730. Action<Exception, object> onErrorFunc;
  731. return RobustEnumerationIndex(input, funcs, stats, out onErrorFunc, linqExecutionDuration);
  732. }
  733. protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats, out Action<Exception, object> onErrorFunc, Stopwatch linqExecutionDuration)
  734. {
  735. onErrorFunc = (exception, o) =>
  736. {
  737. string docId = null;
  738. var invalidSpatialException = exception as InvalidSpatialShapException;
  739. if (invalidSpatialException != null)
  740. docId = invalidSpatialException.InvalidDocumentId;
  741. context.AddError(indexId, indexDefinition.Name, docId ?? TryGetDocKey(o), exception, "Map");
  742. logIndexing.WarnException(
  743. String.Format("Failed to execute indexing function on {0} on {1}", indexDefinition.Name, TryGetDocKey(o)), exception);
  744. stats.IndexingErrors++;
  745. };
  746. return new RobustEnumerator(context.CancellationToken, context.Configuration.Core.MaxNumberOfItemsToProcessInSingleBatch,
  747. beforeMoveNext: () => Interlocked.Increment(ref stats.IndexingAttempts),
  748. cancelMoveNext: () => Interlocked.Decrement(ref stats.IndexingAttempts),
  749. onError: onErrorFunc)
  750. {
  751. MoveNextDuration = linqExecutionDuration
  752. }
  753. .RobustEnumeration(input, funcs);
  754. }
  755. protected IEnumerable<object> RobustEnumerationReduce(IEnumerator<object> input, IndexingFunc func, IndexingWorkStats stats, Stopwatch linqExecutionDuration)
  756. {
  757. // not strictly accurate, but if we get that many errors, probably an error anyway.
  758. return new RobustEnumerator(context.CancellationToken, context.Configuration.Core.MaxNumberOfItemsToProcessInSingleBatch,
  759. beforeMoveNext: () => Interlocked.Increment(ref stats.ReduceAttempts),
  760. cancelMoveNext: () => Interlocked.Decrement(ref stats.ReduceAttempts),
  761. onError: (exception, o) =>
  762. {
  763. var key = TryGetDocKey(o);
  764. context.AddError(indexId,
  765. indexDefinition.Name,
  766. key,
  767. exception,
  768. "Reduce"
  769. );
  770. logIndexing.WarnException(
  771. String.Format("Failed to execute indexing function on {0} on {1}", indexDefinition.Name,
  772. key),
  773. exception);
  774. stats.ReduceErrors++;
  775. })
  776. {
  777. MoveNextDuration = linqExecutionDuration
  778. }.RobustEnumeration(input, func);
  779. }
  780. // we don't care about tracking map/reduce stats here, since it is merely
  781. // an optimization step
  782. protected IEnumerable<object> RobustEnumerationReduceDuringMapPhase(IEnumerator<object> input, IndexingFunc func, Stopwatch reduceDuringMapLinqExecution)
  783. {
  784. // not strictly accurate, but if we get that many errors, probably an error anyway.
  785. return new RobustEnumerator(context.CancellationToken, context.Configuration.Core.MaxNumberOfItemsToProcessInSingleBatch,
  786. onError: (exception, o) =>
  787. {
  788. var keys = TryGetDocKeys(input, o);
  789. var concatenatedKeys = string.Join(";", keys);
  790. context.AddError(indexId,
  791. indexDefinition.Name,
  792. concatenatedKeys,
  793. exception,
  794. "Reduce"
  795. );
  796. logIndexing.WarnException(
  797. String.Format("Failed to execute indexing function on {0} on {1}", indexDefinition.Name,
  798. concatenatedKeys),
  799. exception);
  800. })
  801. {
  802. MoveNextDuration = reduceDuringMapLinqExecution
  803. }.RobustEnumeration(input, func);
  804. }
  805. private static IEnumerable<string> TryGetDocKeys(IEnumerator<object> input, object current)
  806. {
  807. var keys = new HashSet<string>();
  808. var key = TryGetDocKey(current);
  809. if (string.IsNullOrEmpty(key) == false)
  810. keys.Add(key);
  811. else
  812. {
  813. input.Reset();
  814. while (input.MoveNext())
  815. {
  816. key = TryGetDocKey(input.Current);
  817. if (string.IsNullOrEmpty(key))
  818. continue;
  819. keys.Add(key);
  820. }
  821. }
  822. return keys;
  823. }
  824. public static string TryGetDocKey(object current)
  825. {
  826. var dic = current as DynamicJsonObject;
  827. if (dic == null)
  828. return null;
  829. object value = dic.GetValue(Constants.DocumentIdFieldName) ??
  830. dic.GetValue(Constants.ReduceKeyFieldName);
  831. if (value != null)
  832. return value.ToString();
  833. return null;
  834. }
  835. public abstract void Remove(string[] keys, WorkContext context);
  836. internal IndexSearcherHolder.IndexSearcherHoldingState GetCurrentStateHolder()
  837. {
  838. return currentIndexSearcherHolder.GetCurrentStateHolder();
  839. }
  840. internal IDisposable GetSearcher(out IndexSearcher searcher)
  841. {
  842. return currentIndexSearcherHolder.GetSearcher(out searcher);
  843. }
  844. internal IDisposable GetSearcherAndTermsDocs(out IndexSearcher searcher, out RavenJObject[] termsDocs)
  845. {
  846. return currentIndexSearcherHolder.GetSearcherAndTermDocs(out searcher, out termsDocs);
  847. }
  848. private void RecreateSearcher()
  849. {
  850. if (indexWriter == null)
  851. {
  852. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(directory, true), PublicName, wait: false);
  853. }
  854. else
  855. {
  856. var indexReader = indexWriter.GetReader();
  857. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(indexReader), PublicName, wait: false);
  858. }
  859. }
  860. protected void AddDocumentToIndex(RavenIndexWriter currentIndexWriter, Document luceneDoc, Analyzer analyzer)
  861. {
  862. Analyzer newAnalyzer = AnalyzerGenerators.Aggregate(analyzer,
  863. (currentAnalyzer, generator) =>
  864. {
  865. Analyzer generateAnalyzer =
  866. generator.Value.GenerateAnalyzerForIndexing(PublicName, luceneDoc,
  867. currentAnalyzer);
  868. if (generateAnalyzer != currentAnalyzer &&
  869. currentAnalyzer != analyzer)
  870. currentAnalyzer.Close();
  871. return generateAnalyzer;
  872. });
  873. try
  874. {
  875. if (indexExtensions.Count > 0)
  876. currentlyIndexDocuments.Add(CloneDocument(luceneDoc));
  877. currentIndexWriter.AddDocument(luceneDoc, newAnalyzer);
  878. foreach (var fieldable in luceneDoc.GetFields())
  879. {
  880. using (fieldable.ReaderValue) // dispose all the readers
  881. {
  882. }
  883. }
  884. }
  885. finally
  886. {
  887. if (newAnalyzer != analyzer)
  888. newAnalyzer.Close();
  889. }
  890. }
  891. public void MarkQueried()
  892. {
  893. lastQueryTime = SystemTime.UtcNow;
  894. }
  895. public void MarkQueried(DateTime time)
  896. {
  897. if (lastQueryTime != null &&
  898. lastQueryTime.Value >= time)
  899. return;
  900. lastQueryTime = time;
  901. }
  902. public IIndexExtension GetExtension(string indexExtensionKey)
  903. {
  904. IIndexExtension val;
  905. indexExtensions.TryGetValue(indexExtensionKey, out val);
  906. return val;
  907. }
  908. public IIndexExtension GetExtensionByPrefix(string indexExtensionKeyPrefix)
  909. {
  910. return indexExtensions.FirstOrDefault(x => x.Key.StartsWith(indexExtensionKeyPrefix)).Value;
  911. }
  912. public void SetExtension(string indexExtensionKey, IIndexExtension extension)
  913. {
  914. indexExtensions.TryAdd(indexExtensionKey, extension);
  915. }
  916. private static Document CloneDocument(Document luceneDoc)
  917. {
  918. var clonedDocument = new Document();
  919. foreach (AbstractField field in luceneDoc.GetFields())
  920. {
  921. var numericField = field as NumericField;
  922. if (numericField != null)
  923. {
  924. var clonedNumericField = new NumericField(numericField.Name,
  925. numericField.IsStored ? Field.Store.YES : Field.Store.NO,
  926. numericField.IsIndexed);
  927. var numericValue = numericField.NumericValue;
  928. if (numericValue is int)
  929. {
  930. clonedNumericField.SetIntValue((int)numericValue);
  931. }
  932. else if (numericValue is long)
  933. {
  934. clonedNumericField.SetLongValue((long)numericValue);
  935. }
  936. else if (numericValue is double)
  937. {
  938. clonedNumericField.SetDoubleValue((double)numericValue);
  939. }
  940. else if (numericValue is float)
  941. {
  942. clonedNumericField.SetFloatValue((float)numericValue);
  943. }
  944. clonedDocument.Add(clonedNumericField);
  945. }
  946. else
  947. {
  948. Field clonedField;
  949. if (field.IsBinary)
  950. {
  951. clonedField = new Field(field.Name, field.GetBinaryValue(),
  952. field.IsStored ? Field.Store.YES : Field.Store.NO);
  953. }
  954. else if (field.StringValue != null)
  955. {
  956. clonedField = new Field(field.Name, field.StringValue,
  957. field.IsStored ? Field.Store.YES : Field.Store.NO,
  958. field.IsIndexed ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NOT_ANALYZED_NO_NORMS,
  959. field.IsTermVectorStored ? Field.TermVector.YES : Field.TermVector.NO);
  960. }
  961. else
  962. {
  963. //probably token stream, and we can't handle fields with token streams, so we skip this.
  964. continue;
  965. }
  966. clonedDocument.Add(clonedField);
  967. }
  968. }
  969. return clonedDocument;
  970. }
  971. protected void LogIndexedDocument(string key, Document luceneDoc)
  972. {
  973. if (!logIndexing.IsDebugEnabled)
  974. return;
  975. var fieldsForLogging = luceneDoc.GetFields().Select(x => new
  976. {
  977. x.Name,
  978. Value = x.IsBinary ? "<binary>" : x.StringValue,
  979. Indexed = x.IsIndexed,
  980. Stored = x.IsStored,
  981. });
  982. var sb = new StringBuilder();
  983. foreach (var fieldForLogging in fieldsForLogging)
  984. {
  985. sb.Append("\t").Append(fieldForLogging.Name)
  986. .Append(" ")
  987. .Append(fieldForLogging.Indexed ? "I" : "-")
  988. .Append(fieldForLogging.Stored ? "S" : "-")
  989. .Append(": ")
  990. .Append(fieldForLogging.Value)
  991. .AppendLine();
  992. }
  993. if (logIndexing.IsDebugEnabled)
  994. logIndexing.Debug("Indexing on {0} result in index {1} gave document: {2}", key, PublicName,
  995. sb.ToString());
  996. }
  997. [CLSCompliant(false)]
  998. public static void AssertQueryDoesNotContainFieldsThatAreNotIndexed(IndexQuery indexQuery, AbstractViewGenerator viewGenerator)
  999. {
  1000. if (string.IsNullOrWhiteSpace(indexQuery.Query) == false)
  1001. {
  1002. HashSet<string> hashSet = SimpleQueryParser.GetFields(indexQuery);
  1003. foreach (string field in hashSet)
  1004. {
  1005. string f = field;
  1006. if (f.EndsWith("_Range"))
  1007. {
  1008. f = f.Substring(0, f.Length - "_Range".Length);
  1009. }
  1010. if (viewGenerator.ContainsField(f) == false &&
  1011. viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
  1012. throw new ArgumentException("The field '" + f + "' is not indexed, cannot query on fields that are not indexed");
  1013. }
  1014. }
  1015. if (indexQuery.SortedFields != null)
  1016. {
  1017. foreach (SortedField sortedField in indexQuery.SortedFields)
  1018. {
  1019. string field = sortedField.Field;
  1020. if (field == Constants.TemporaryScoreValue)
  1021. continue;
  1022. if (field.EndsWith("_Range"))
  1023. {
  1024. field = field.Substring(0, field.Length - "_Range".Length);
  1025. }
  1026. if (field.StartsWith(Constants.RandomFieldName) || field.StartsWith(Constants.CustomSortFieldName))
  1027. continue;
  1028. if (field.StartsWith(Constants.AlphaNumericFieldName))
  1029. {
  1030. field = SortFieldHelper.CustomField(field).Name;
  1031. if (string.IsNullOrEmpty(field))
  1032. throw new ArgumentException("Alpha numeric sorting requires a field name");
  1033. }
  1034. if (viewGenerator.ContainsField(field) == false && !field.StartsWith(Constants.DistanceFieldName)
  1035. && viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
  1036. throw new ArgumentException("The field '" + field + "' is not indexed, cannot sort on fields that are not indexed");
  1037. }
  1038. }
  1039. }
  1040. #region Nested type: IndexQueryOperation
  1041. public class IndexQueryOperation
  1042. {
  1043. FastVectorHighlighter highlighter;
  1044. FieldQuery fieldQuery;
  1045. private readonly Stopwatch _queryParseDuration = new Stopwatch();
  1046. private readonly IndexQuery indexQuery;
  1047. private readonly Index parent;
  1048. private readonly Func<IndexQueryResult, bool> shouldInclude

Large files files are truncated, but you can click here to view the full file