PageRenderTime 55ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/Raven.Database/Indexing/Index.cs

https://github.com/kairogyn/ravendb
C# | 1548 lines | 1339 code | 163 blank | 46 comment | 217 complexity | ad04d231427ff9caa5215f992b237de0 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, CC-BY-SA-3.0

Large files files are truncated, but you can click here to view the full file

  1. //-----------------------------------------------------------------------
  2. // <copyright file="Index.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. //-----------------------------------------------------------------------
  6. using System;
  7. using System.Collections;
  8. using System.Collections.Concurrent;
  9. using System.Collections.Generic;
  10. using System.ComponentModel.Composition;
  11. using System.Diagnostics;
  12. using System.IO;
  13. using System.Linq;
  14. using System.Text;
  15. using System.Threading;
  16. using Lucene.Net.Analysis;
  17. using Lucene.Net.Analysis.Standard;
  18. using Lucene.Net.Documents;
  19. using Lucene.Net.Index;
  20. using Lucene.Net.Search;
  21. using Lucene.Net.Search.Vectorhighlight;
  22. using Lucene.Net.Store;
  23. using Raven.Abstractions;
  24. using Raven.Abstractions.Data;
  25. using Raven.Abstractions.Extensions;
  26. using Raven.Abstractions.Indexing;
  27. using Raven.Abstractions.Linq;
  28. using Raven.Abstractions.Logging;
  29. using Raven.Abstractions.MEF;
  30. using Raven.Database.Data;
  31. using Raven.Database.Extensions;
  32. using Raven.Database.Linq;
  33. using Raven.Database.Plugins;
  34. using Raven.Database.Server.Responders;
  35. using Raven.Database.Storage;
  36. using Raven.Database.Tasks;
  37. using Raven.Database.Util;
  38. using Raven.Json.Linq;
  39. using Directory = Lucene.Net.Store.Directory;
  40. using Document = Lucene.Net.Documents.Document;
  41. using Field = Lucene.Net.Documents.Field;
  42. using Version = Lucene.Net.Util.Version;
  43. namespace Raven.Database.Indexing
  44. {
  45. /// <summary>
  46. /// This is a thread safe, single instance for a particular index.
  47. /// </summary>
  48. public abstract class Index : IDisposable
  49. {
  50. protected static readonly ILog logIndexing = LogManager.GetLogger(typeof(Index).FullName + ".Indexing");
  51. protected static readonly ILog logQuerying = LogManager.GetLogger(typeof(Index).FullName + ".Querying");
  52. private readonly List<Document> currentlyIndexDocuments = new List<Document>();
  53. protected Directory directory;
  54. protected readonly IndexDefinition indexDefinition;
  55. private volatile string waitReason;
  56. public IndexingPriority Priority { get; set; }
  57. /// <summary>
  58. /// Note, this might be written to be multiple threads at the same time
  59. /// We don't actually care for exact timing, it is more about general feeling
  60. /// </summary>
  61. private DateTime? lastQueryTime;
  62. private readonly ConcurrentDictionary<string, IIndexExtension> indexExtensions =
  63. new ConcurrentDictionary<string, IIndexExtension>();
  64. internal readonly string name;
  65. private readonly AbstractViewGenerator viewGenerator;
  66. protected readonly WorkContext context;
  67. private readonly object writeLock = new object();
  68. private volatile bool disposed;
  69. private RavenIndexWriter indexWriter;
  70. private SnapshotDeletionPolicy snapshotter;
  71. private readonly IndexSearcherHolder currentIndexSearcherHolder;
  72. private readonly ConcurrentDictionary<string, IndexingPerformanceStats> currentlyIndexing = new ConcurrentDictionary<string, IndexingPerformanceStats>();
  73. private readonly ConcurrentQueue<IndexingPerformanceStats> indexingPerformanceStats = new ConcurrentQueue<IndexingPerformanceStats>();
  74. private readonly static StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_30);
  75. private bool forceWriteToDisk;
  76. protected Index(Directory directory, string name, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator, WorkContext context)
  77. {
  78. currentIndexSearcherHolder = new IndexSearcherHolder(name ,context);
  79. if (directory == null) throw new ArgumentNullException("directory");
  80. if (name == null) throw new ArgumentNullException("name");
  81. if (indexDefinition == null) throw new ArgumentNullException("indexDefinition");
  82. if (viewGenerator == null) throw new ArgumentNullException("viewGenerator");
  83. this.name = name;
  84. this.indexDefinition = indexDefinition;
  85. this.viewGenerator = viewGenerator;
  86. this.context = context;
  87. logIndexing.Debug("Creating index for {0}", name);
  88. this.directory = directory;
  89. RecreateSearcher();
  90. }
  91. [ImportMany]
  92. public OrderedPartCollection<AbstractAnalyzerGenerator> AnalyzerGenerators { get; set; }
  93. /// <summary>
  94. /// Whatever this is a map reduce index or not
  95. /// </summary>
  96. public abstract bool IsMapReduce { get; }
  97. public DateTime? LastQueryTime
  98. {
  99. get
  100. {
  101. return lastQueryTime;
  102. }
  103. }
  104. public DateTime LastIndexTime { get; set; }
  105. protected DateTime PreviousIndexTime { get; set; }
  106. public string IsOnRam
  107. {
  108. get
  109. {
  110. var ramDirectory = directory as RAMDirectory;
  111. if (ramDirectory == null)
  112. return "false";
  113. try
  114. {
  115. return "true (" + DatabaseSize.Humane(ramDirectory.SizeInBytes()) + ")";
  116. }
  117. catch (AlreadyClosedException)
  118. {
  119. return "false";
  120. }
  121. }
  122. }
  123. public volatile bool IsMapIndexingInProgress;
  124. protected void RecordCurrentBatch(string indexingStep, int size)
  125. {
  126. var performanceStats = new IndexingPerformanceStats
  127. {
  128. InputCount = size,
  129. Operation = indexingStep,
  130. Started = SystemTime.UtcNow,
  131. };
  132. currentlyIndexing.AddOrUpdate(indexingStep, performanceStats, (s, stats) => performanceStats);
  133. }
  134. protected void BatchCompleted(string indexingStep)
  135. {
  136. IndexingPerformanceStats value;
  137. currentlyIndexing.TryRemove(indexingStep, out value);
  138. }
  139. protected void AddindexingPerformanceStat(IndexingPerformanceStats stats)
  140. {
  141. indexingPerformanceStats.Enqueue(stats);
  142. while (indexingPerformanceStats.Count > 25)
  143. indexingPerformanceStats.TryDequeue(out stats);
  144. }
  145. public void Dispose()
  146. {
  147. try
  148. {
  149. // this is here so we can give good logs in the case of a long shutdown process
  150. if (Monitor.TryEnter(writeLock, 100) == false)
  151. {
  152. var localReason = waitReason;
  153. if (localReason != null)
  154. logIndexing.Warn("Waiting for {0} to complete before disposing of index {1}, that might take a while if the server is very busy",
  155. localReason, name);
  156. Monitor.Enter(writeLock);
  157. }
  158. disposed = true;
  159. foreach (var indexExtension in indexExtensions)
  160. {
  161. indexExtension.Value.Dispose();
  162. }
  163. if (currentIndexSearcherHolder != null)
  164. {
  165. var item = currentIndexSearcherHolder.SetIndexSearcher(null, wait: true);
  166. if (item.WaitOne(TimeSpan.FromSeconds(5)) == false)
  167. {
  168. logIndexing.Warn("After closing the index searching, we waited for 5 seconds for the searching to be done, but it wasn't. Continuing with normal shutdown anyway.");
  169. }
  170. }
  171. if (indexWriter != null)
  172. {
  173. var writer = indexWriter;
  174. indexWriter = null;
  175. try
  176. {
  177. writer.Analyzer.Close();
  178. }
  179. catch (Exception e)
  180. {
  181. logIndexing.ErrorException("Error while closing the index (closing the analyzer failed)", e);
  182. }
  183. try
  184. {
  185. writer.Dispose();
  186. }
  187. catch (Exception e)
  188. {
  189. logIndexing.ErrorException("Error when closing the index", e);
  190. }
  191. }
  192. try
  193. {
  194. directory.Dispose();
  195. }
  196. catch (Exception e)
  197. {
  198. logIndexing.ErrorException("Error when closing the directory", e);
  199. }
  200. }
  201. finally
  202. {
  203. Monitor.Exit(writeLock);
  204. }
  205. }
  206. public void Flush()
  207. {
  208. lock (writeLock)
  209. {
  210. if (disposed)
  211. return;
  212. if (indexWriter == null)
  213. return;
  214. try
  215. {
  216. waitReason = "Flush";
  217. indexWriter.Commit();
  218. }
  219. finally
  220. {
  221. waitReason = null;
  222. }
  223. }
  224. }
  225. public void MergeSegments()
  226. {
  227. lock (writeLock)
  228. {
  229. waitReason = "Merge / Optimize";
  230. try
  231. {
  232. logIndexing.Info("Starting merge of {0}", name);
  233. var sp = Stopwatch.StartNew();
  234. indexWriter.Optimize();
  235. logIndexing.Info("Done merging {0} - took {1}", name, sp.Elapsed);
  236. }
  237. finally
  238. {
  239. waitReason = null;
  240. }
  241. }
  242. }
  243. public abstract void IndexDocuments(AbstractViewGenerator viewGenerator, IndexingBatch batch, IStorageActionsAccessor actions, DateTime minimumTimestamp);
  244. protected virtual IndexQueryResult RetrieveDocument(Document document, FieldsToFetch fieldsToFetch, ScoreDoc score)
  245. {
  246. return new IndexQueryResult
  247. {
  248. Score = score.Score,
  249. Key = document.Get(Constants.DocumentIdFieldName),
  250. Projection = (fieldsToFetch.IsProjection || fieldsToFetch.FetchAllStoredFields) ? CreateDocumentFromFields(document, fieldsToFetch) : null
  251. };
  252. }
  253. public static RavenJObject CreateDocumentFromFields(Document document, FieldsToFetch fieldsToFetch)
  254. {
  255. var documentFromFields = new RavenJObject();
  256. var fields = fieldsToFetch.Fields;
  257. if (fieldsToFetch.FetchAllStoredFields)
  258. fields = fields.Concat(document.GetFields().Select(x => x.Name));
  259. var q = fields
  260. .Distinct()
  261. .SelectMany(name => document.GetFields(name) ?? new Field[0])
  262. .Where(x => x != null)
  263. .Where(
  264. x =>
  265. x.Name.EndsWith("_IsArray") == false &&
  266. x.Name.EndsWith("_Range") == false &&
  267. x.Name.EndsWith("_ConvertToJson") == false)
  268. .Select(fld => CreateProperty(fld, document))
  269. .GroupBy(x => x.Key)
  270. .Select(g =>
  271. {
  272. if (g.Count() == 1 && document.GetField(g.Key + "_IsArray") == null)
  273. {
  274. return g.First();
  275. }
  276. var ravenJTokens = g.Select(x => x.Value).ToArray();
  277. return new KeyValuePair<string, RavenJToken>(g.Key, new RavenJArray((IEnumerable)ravenJTokens));
  278. });
  279. foreach (var keyValuePair in q)
  280. {
  281. documentFromFields.Add(keyValuePair.Key, keyValuePair.Value);
  282. }
  283. return documentFromFields;
  284. }
  285. private static KeyValuePair<string, RavenJToken> CreateProperty(Field fld, Document document)
  286. {
  287. if (fld.IsBinary)
  288. return new KeyValuePair<string, RavenJToken>(fld.Name, fld.GetBinaryValue());
  289. var stringValue = fld.StringValue;
  290. if (document.GetField(fld.Name + "_ConvertToJson") != null)
  291. {
  292. var val = RavenJToken.Parse(fld.StringValue) as RavenJObject;
  293. return new KeyValuePair<string, RavenJToken>(fld.Name, val);
  294. }
  295. if (stringValue == Constants.NullValue)
  296. stringValue = null;
  297. if (stringValue == Constants.EmptyString)
  298. stringValue = string.Empty;
  299. return new KeyValuePair<string, RavenJToken>(fld.Name, stringValue);
  300. }
  301. protected void Write(Func<RavenIndexWriter, Analyzer, IndexingWorkStats, IndexedItemsInfo> action)
  302. {
  303. if (disposed)
  304. throw new ObjectDisposedException("Index " + name + " has been disposed");
  305. PreviousIndexTime = LastIndexTime;
  306. LastIndexTime = SystemTime.UtcNow;
  307. lock (writeLock)
  308. {
  309. bool shouldRecreateSearcher;
  310. var toDispose = new List<Action>();
  311. Analyzer searchAnalyzer = null;
  312. var itemsInfo = new IndexedItemsInfo();
  313. try
  314. {
  315. waitReason = "Write";
  316. try
  317. {
  318. searchAnalyzer = CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose);
  319. }
  320. catch (Exception e)
  321. {
  322. context.AddError(name, "Creating Analyzer", e.ToString(), "Analyzer");
  323. throw;
  324. }
  325. if (indexWriter == null)
  326. {
  327. CreateIndexWriter();
  328. }
  329. var locker = directory.MakeLock("writing-to-index.lock");
  330. try
  331. {
  332. var stats = new IndexingWorkStats();
  333. try
  334. {
  335. if (locker.Obtain() == false)
  336. {
  337. throw new InvalidOperationException(
  338. string.Format("Could not obtain the 'writing-to-index' lock of '{0}' index",
  339. name));
  340. }
  341. itemsInfo = action(indexWriter, searchAnalyzer, stats);
  342. shouldRecreateSearcher = itemsInfo.ChangedDocs > 0;
  343. foreach (var indexExtension in indexExtensions.Values)
  344. {
  345. indexExtension.OnDocumentsIndexed(currentlyIndexDocuments, searchAnalyzer);
  346. }
  347. }
  348. catch (Exception e)
  349. {
  350. context.AddError(name, null, e.ToString(), "Write");
  351. throw;
  352. }
  353. if (itemsInfo.ChangedDocs > 0)
  354. {
  355. UpdateIndexingStats(context, stats);
  356. WriteInMemoryIndexToDiskIfNecessary(itemsInfo.HighestETag);
  357. Flush(); // just make sure changes are flushed to disk
  358. }
  359. }
  360. finally
  361. {
  362. locker.Release();
  363. }
  364. }
  365. catch (Exception e)
  366. {
  367. throw new InvalidOperationException("Could not properly write to index " + name, e);
  368. }
  369. finally
  370. {
  371. currentlyIndexDocuments.Clear();
  372. if (searchAnalyzer != null)
  373. searchAnalyzer.Close();
  374. foreach (Action dispose in toDispose)
  375. {
  376. dispose();
  377. }
  378. waitReason = null;
  379. LastIndexTime = SystemTime.UtcNow;
  380. }
  381. try
  382. {
  383. HandleCommitPoints(itemsInfo);
  384. }
  385. catch (Exception e)
  386. {
  387. logIndexing.WarnException("Could not handle commit point properly, ignoring", e);
  388. }
  389. if (shouldRecreateSearcher)
  390. RecreateSearcher();
  391. }
  392. }
  393. protected abstract void HandleCommitPoints(IndexedItemsInfo itemsInfo);
  394. protected void UpdateIndexingStats(WorkContext workContext, IndexingWorkStats stats)
  395. {
  396. switch (stats.Operation)
  397. {
  398. case IndexingWorkStats.Status.Map:
  399. workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateIndexingStats(name, stats));
  400. break;
  401. case IndexingWorkStats.Status.Reduce:
  402. workContext.TransactionalStorage.Batch(accessor => accessor.Indexing.UpdateReduceStats(name, stats));
  403. break;
  404. case IndexingWorkStats.Status.Ignore:
  405. break;
  406. default:
  407. throw new ArgumentOutOfRangeException();
  408. }
  409. }
  410. private void CreateIndexWriter()
  411. {
  412. snapshotter = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
  413. IndexWriter.IndexReaderWarmer indexReaderWarmer = context.IndexReaderWarmers != null
  414. ? new IndexReaderWarmersWrapper(name, context.IndexReaderWarmers)
  415. : null;
  416. indexWriter = new RavenIndexWriter(directory, stopAnalyzer, snapshotter, IndexWriter.MaxFieldLength.UNLIMITED, context.Configuration.MaxIndexWritesBeforeRecreate, indexReaderWarmer);
  417. }
  418. private void WriteInMemoryIndexToDiskIfNecessary(Etag highestETag)
  419. {
  420. if (context.Configuration.RunInMemory ||
  421. context.IndexDefinitionStorage == null) // may happen during index startup
  422. return;
  423. var dir = indexWriter.Directory as RAMDirectory;
  424. if (dir == null)
  425. return;
  426. var stale = IsUpToDateEnoughToWriteToDisk(highestETag) == false;
  427. var toobig = dir.SizeInBytes() >= context.Configuration.NewIndexInMemoryMaxBytes;
  428. if (forceWriteToDisk || toobig || !stale)
  429. {
  430. indexWriter.Commit();
  431. var fsDir = context.IndexStorage.MakeRAMDirectoryPhysical(dir, indexDefinition.Name);
  432. IndexStorage.WriteIndexVersion(fsDir, indexDefinition);
  433. directory = fsDir;
  434. indexWriter.Dispose(true);
  435. dir.Dispose();
  436. CreateIndexWriter();
  437. }
  438. }
  439. protected abstract bool IsUpToDateEnoughToWriteToDisk(Etag highestETag);
  440. public RavenPerFieldAnalyzerWrapper CreateAnalyzer(Analyzer defaultAnalyzer, ICollection<Action> toDispose, bool forQuerying = false)
  441. {
  442. toDispose.Add(defaultAnalyzer.Close);
  443. string value;
  444. if (indexDefinition.Analyzers.TryGetValue(Constants.AllFields, out value))
  445. {
  446. defaultAnalyzer = IndexingExtensions.CreateAnalyzerInstance(Constants.AllFields, value);
  447. toDispose.Add(defaultAnalyzer.Close);
  448. }
  449. var perFieldAnalyzerWrapper = new RavenPerFieldAnalyzerWrapper(defaultAnalyzer);
  450. foreach (var analyzer in indexDefinition.Analyzers)
  451. {
  452. Analyzer analyzerInstance = IndexingExtensions.CreateAnalyzerInstance(analyzer.Key, analyzer.Value);
  453. toDispose.Add(analyzerInstance.Close);
  454. if (forQuerying)
  455. {
  456. var customAttributes = analyzerInstance.GetType().GetCustomAttributes(typeof(NotForQueryingAttribute), false);
  457. if (customAttributes.Length > 0)
  458. continue;
  459. }
  460. perFieldAnalyzerWrapper.AddAnalyzer(analyzer.Key, analyzerInstance);
  461. }
  462. StandardAnalyzer standardAnalyzer = null;
  463. KeywordAnalyzer keywordAnalyzer = null;
  464. foreach (var fieldIndexing in indexDefinition.Indexes)
  465. {
  466. switch (fieldIndexing.Value)
  467. {
  468. case FieldIndexing.NotAnalyzed:
  469. if (keywordAnalyzer == null)
  470. {
  471. keywordAnalyzer = new KeywordAnalyzer();
  472. toDispose.Add(keywordAnalyzer.Close);
  473. }
  474. perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, keywordAnalyzer);
  475. break;
  476. case FieldIndexing.Analyzed:
  477. if (indexDefinition.Analyzers.ContainsKey(fieldIndexing.Key))
  478. continue;
  479. if (standardAnalyzer == null)
  480. {
  481. standardAnalyzer = new StandardAnalyzer(Version.LUCENE_29);
  482. toDispose.Add(standardAnalyzer.Close);
  483. }
  484. perFieldAnalyzerWrapper.AddAnalyzer(fieldIndexing.Key, standardAnalyzer);
  485. break;
  486. }
  487. }
  488. return perFieldAnalyzerWrapper;
  489. }
  490. protected IEnumerable<object> RobustEnumerationIndex(IEnumerator<object> input, List<IndexingFunc> funcs, IndexingWorkStats stats)
  491. {
  492. return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToIndexInSingleBatch)
  493. {
  494. BeforeMoveNext = () => Interlocked.Increment(ref stats.IndexingAttempts),
  495. CancelMoveNext = () => Interlocked.Decrement(ref stats.IndexingAttempts),
  496. OnError = (exception, o) =>
  497. {
  498. context.AddError(name,
  499. TryGetDocKey(o),
  500. exception.Message,
  501. "Map"
  502. );
  503. logIndexing.WarnException(
  504. String.Format("Failed to execute indexing function on {0} on {1}", name,
  505. TryGetDocKey(o)),
  506. exception);
  507. stats.IndexingErrors++;
  508. }
  509. }.RobustEnumeration(input, funcs);
  510. }
  511. protected IEnumerable<object> RobustEnumerationReduce(IEnumerator<object> input, IndexingFunc func,
  512. IStorageActionsAccessor actions,
  513. IndexingWorkStats stats)
  514. {
  515. // not strictly accurate, but if we get that many errors, probably an error anyway.
  516. return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToIndexInSingleBatch)
  517. {
  518. BeforeMoveNext = () => Interlocked.Increment(ref stats.ReduceAttempts),
  519. CancelMoveNext = () => Interlocked.Decrement(ref stats.ReduceAttempts),
  520. OnError = (exception, o) =>
  521. {
  522. context.AddError(name,
  523. TryGetDocKey(o),
  524. exception.Message,
  525. "Reduce"
  526. );
  527. logIndexing.WarnException(
  528. String.Format("Failed to execute indexing function on {0} on {1}", name,
  529. TryGetDocKey(o)),
  530. exception);
  531. stats.ReduceErrors++;
  532. }
  533. }.RobustEnumeration(input, func);
  534. }
  535. // we don't care about tracking map/reduce stats here, since it is merely
  536. // an optimization step
  537. protected IEnumerable<object> RobustEnumerationReduceDuringMapPhase(IEnumerator<object> input, IndexingFunc func)
  538. {
  539. // not strictly accurate, but if we get that many errors, probably an error anyway.
  540. return new RobustEnumerator(context.CancellationToken, context.Configuration.MaxNumberOfItemsToIndexInSingleBatch)
  541. {
  542. BeforeMoveNext = () => { }, // don't care
  543. CancelMoveNext = () => { }, // don't care
  544. OnError = (exception, o) =>
  545. {
  546. context.AddError(name,
  547. TryGetDocKey(o),
  548. exception.Message,
  549. "Reduce"
  550. );
  551. logIndexing.WarnException(
  552. String.Format("Failed to execute indexing function on {0} on {1}", name,
  553. TryGetDocKey(o)),
  554. exception);
  555. }
  556. }.RobustEnumeration(input, func);
  557. }
  558. public static string TryGetDocKey(object current)
  559. {
  560. var dic = current as DynamicJsonObject;
  561. if (dic == null)
  562. return null;
  563. object value = dic.GetValue(Constants.DocumentIdFieldName);
  564. if (value == null)
  565. return null;
  566. return value.ToString();
  567. }
  568. public abstract void Remove(string[] keys, WorkContext context);
  569. internal IndexSearcherHolder.IndexSearcherHoldingState GetCurrentStateHolder()
  570. {
  571. return currentIndexSearcherHolder.GetCurrentStateHolder();
  572. }
  573. internal IDisposable GetSearcher(out IndexSearcher searcher)
  574. {
  575. return currentIndexSearcherHolder.GetSearcher(out searcher);
  576. }
  577. internal IDisposable GetSearcherAndTermsDocs(out IndexSearcher searcher, out RavenJObject[] termsDocs)
  578. {
  579. return currentIndexSearcherHolder.GetSearcherAndTermDocs(out searcher, out termsDocs);
  580. }
  581. private void RecreateSearcher()
  582. {
  583. if (indexWriter == null)
  584. {
  585. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(directory, true), wait: false);
  586. }
  587. else
  588. {
  589. var indexReader = indexWriter.GetReader();
  590. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(indexReader), wait: false);
  591. }
  592. }
  593. protected void AddDocumentToIndex(RavenIndexWriter currentIndexWriter, Document luceneDoc, Analyzer analyzer)
  594. {
  595. Analyzer newAnalyzer = AnalyzerGenerators.Aggregate(analyzer,
  596. (currentAnalyzer, generator) =>
  597. {
  598. Analyzer generateAnalyzer =
  599. generator.Value.GenerateAnalyzerForIndexing(name, luceneDoc,
  600. currentAnalyzer);
  601. if (generateAnalyzer != currentAnalyzer &&
  602. currentAnalyzer != analyzer)
  603. currentAnalyzer.Close();
  604. return generateAnalyzer;
  605. });
  606. try
  607. {
  608. if (indexExtensions.Count > 0)
  609. currentlyIndexDocuments.Add(CloneDocument(luceneDoc));
  610. currentIndexWriter.AddDocument(luceneDoc, newAnalyzer);
  611. foreach (var fieldable in luceneDoc.GetFields())
  612. {
  613. using (fieldable.ReaderValue) // dispose all the readers
  614. {
  615. }
  616. }
  617. }
  618. finally
  619. {
  620. if (newAnalyzer != analyzer)
  621. newAnalyzer.Close();
  622. }
  623. }
  624. public void MarkQueried()
  625. {
  626. lastQueryTime = SystemTime.UtcNow;
  627. }
  628. public void MarkQueried(DateTime time)
  629. {
  630. lastQueryTime = time;
  631. }
  632. public IIndexExtension GetExtension(string indexExtensionKey)
  633. {
  634. IIndexExtension val;
  635. indexExtensions.TryGetValue(indexExtensionKey, out val);
  636. return val;
  637. }
  638. public IIndexExtension GetExtensionByPrefix(string indexExtensionKeyPrefix)
  639. {
  640. return indexExtensions.FirstOrDefault(x => x.Key.StartsWith(indexExtensionKeyPrefix)).Value;
  641. }
  642. public void SetExtension(string indexExtensionKey, IIndexExtension extension)
  643. {
  644. indexExtensions.TryAdd(indexExtensionKey, extension);
  645. }
  646. private static Document CloneDocument(Document luceneDoc)
  647. {
  648. var clonedDocument = new Document();
  649. foreach (AbstractField field in luceneDoc.GetFields())
  650. {
  651. var numericField = field as NumericField;
  652. if (numericField != null)
  653. {
  654. var clonedNumericField = new NumericField(numericField.Name,
  655. numericField.IsStored ? Field.Store.YES : Field.Store.NO,
  656. numericField.IsIndexed);
  657. var numericValue = numericField.NumericValue;
  658. if (numericValue is int)
  659. {
  660. clonedNumericField.SetIntValue((int)numericValue);
  661. }
  662. else if (numericValue is long)
  663. {
  664. clonedNumericField.SetLongValue((long)numericValue);
  665. }
  666. else if (numericValue is double)
  667. {
  668. clonedNumericField.SetDoubleValue((double)numericValue);
  669. }
  670. else if (numericValue is float)
  671. {
  672. clonedNumericField.SetFloatValue((float)numericValue);
  673. }
  674. clonedDocument.Add(clonedNumericField);
  675. }
  676. else
  677. {
  678. Field clonedField;
  679. if (field.IsBinary)
  680. {
  681. clonedField = new Field(field.Name, field.GetBinaryValue(),
  682. field.IsStored ? Field.Store.YES : Field.Store.NO);
  683. }
  684. else if (field.StringValue != null)
  685. {
  686. clonedField = new Field(field.Name, field.StringValue,
  687. field.IsStored ? Field.Store.YES : Field.Store.NO,
  688. field.IsIndexed ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NOT_ANALYZED_NO_NORMS,
  689. field.IsTermVectorStored ? Field.TermVector.YES : Field.TermVector.NO);
  690. }
  691. else
  692. {
  693. //probably token stream, and we can't handle fields with token streams, so we skip this.
  694. continue;
  695. }
  696. clonedDocument.Add(clonedField);
  697. }
  698. }
  699. return clonedDocument;
  700. }
  701. protected void LogIndexedDocument(string key, Document luceneDoc)
  702. {
  703. if (logIndexing.IsDebugEnabled)
  704. {
  705. var fieldsForLogging = luceneDoc.GetFields().Cast<IFieldable>().Select(x => new
  706. {
  707. Name = x.Name,
  708. Value = x.IsBinary ? "<binary>" : x.StringValue,
  709. Indexed = x.IsIndexed,
  710. Stored = x.IsStored,
  711. });
  712. var sb = new StringBuilder();
  713. foreach (var fieldForLogging in fieldsForLogging)
  714. {
  715. sb.Append("\t").Append(fieldForLogging.Name)
  716. .Append(" ")
  717. .Append(fieldForLogging.Indexed ? "I" : "-")
  718. .Append(fieldForLogging.Stored ? "S" : "-")
  719. .Append(": ")
  720. .Append(fieldForLogging.Value)
  721. .AppendLine();
  722. }
  723. logIndexing.Debug("Indexing on {0} result in index {1} gave document: {2}", key, name,
  724. sb.ToString());
  725. }
  726. }
  727. public static void AssertQueryDoesNotContainFieldsThatAreNotIndexed(IndexQuery indexQuery, AbstractViewGenerator viewGenerator)
  728. {
  729. if (string.IsNullOrWhiteSpace(indexQuery.Query))
  730. return;
  731. HashSet<string> hashSet = SimpleQueryParser.GetFields(indexQuery);
  732. foreach (string field in hashSet)
  733. {
  734. string f = field;
  735. if (f.EndsWith("_Range"))
  736. {
  737. f = f.Substring(0, f.Length - "_Range".Length);
  738. }
  739. if (viewGenerator.ContainsField(f) == false &&
  740. viewGenerator.ContainsField("_") == false) // the catch all field name means that we have dynamic fields names
  741. throw new ArgumentException("The field '" + f + "' is not indexed, cannot query on fields that are not indexed");
  742. }
  743. if (indexQuery.SortedFields == null)
  744. return;
  745. foreach (SortedField field in indexQuery.SortedFields)
  746. {
  747. string f = field.Field;
  748. if (f == Constants.TemporaryScoreValue)
  749. continue;
  750. if (f.EndsWith("_Range"))
  751. {
  752. f = f.Substring(0, f.Length - "_Range".Length);
  753. }
  754. if (f.StartsWith(Constants.RandomFieldName))
  755. continue;
  756. if (viewGenerator.ContainsField(f) == false && f != Constants.DistanceFieldName
  757. && viewGenerator.ContainsField("_") == false)// the catch all field name means that we have dynamic fields names
  758. throw new ArgumentException("The field '" + f + "' is not indexed, cannot sort on fields that are not indexed");
  759. }
  760. }
  761. #region Nested type: IndexQueryOperation
  762. internal class IndexQueryOperation
  763. {
  764. FastVectorHighlighter highlighter;
  765. FieldQuery fieldQuery;
  766. private readonly IndexQuery indexQuery;
  767. private readonly Index parent;
  768. private readonly Func<IndexQueryResult, bool> shouldIncludeInResults;
  769. private readonly HashSet<RavenJObject> alreadyReturned;
  770. private readonly FieldsToFetch fieldsToFetch;
  771. private readonly HashSet<string> documentsAlreadySeenInPreviousPage = new HashSet<string>();
  772. private readonly OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers;
  773. public IndexQueryOperation(Index parent, IndexQuery indexQuery, Func<IndexQueryResult, bool> shouldIncludeInResults,
  774. FieldsToFetch fieldsToFetch, OrderedPartCollection<AbstractIndexQueryTrigger> indexQueryTriggers)
  775. {
  776. this.parent = parent;
  777. this.indexQuery = indexQuery;
  778. this.shouldIncludeInResults = shouldIncludeInResults;
  779. this.fieldsToFetch = fieldsToFetch;
  780. this.indexQueryTriggers = indexQueryTriggers;
  781. if (fieldsToFetch.IsDistinctQuery)
  782. alreadyReturned = new HashSet<RavenJObject>(new RavenJTokenEqualityComparer());
  783. }
  784. public IEnumerable<RavenJObject> IndexEntries(Reference<int> totalResults)
  785. {
  786. parent.MarkQueried();
  787. using (IndexStorage.EnsureInvariantCulture())
  788. {
  789. AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
  790. IndexSearcher indexSearcher;
  791. RavenJObject[] termsDocs;
  792. using (parent.GetSearcherAndTermsDocs(out indexSearcher, out termsDocs))
  793. {
  794. var luceneQuery = GetLuceneQuery();
  795. TopDocs search = ExecuteQuery(indexSearcher, luceneQuery, indexQuery.Start, indexQuery.PageSize, indexQuery);
  796. totalResults.Value = search.TotalHits;
  797. for (int index = indexQuery.Start; index < search.ScoreDocs.Length; index++)
  798. {
  799. var scoreDoc = search.ScoreDocs[index];
  800. var ravenJObject = (RavenJObject)termsDocs[scoreDoc.Doc].CloneToken();
  801. foreach (var prop in ravenJObject.Where(x => x.Key.EndsWith("_Range")).ToArray())
  802. {
  803. ravenJObject.Remove(prop.Key);
  804. }
  805. yield return ravenJObject;
  806. }
  807. }
  808. }
  809. }
  810. public IEnumerable<IndexQueryResult> Query(CancellationToken token)
  811. {
  812. parent.MarkQueried();
  813. using (IndexStorage.EnsureInvariantCulture())
  814. {
  815. AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
  816. IndexSearcher indexSearcher;
  817. using (parent.GetSearcher(out indexSearcher))
  818. {
  819. var luceneQuery = GetLuceneQuery();
  820. int start = indexQuery.Start;
  821. int pageSize = indexQuery.PageSize;
  822. int returnedResults = 0;
  823. int skippedResultsInCurrentLoop = 0;
  824. bool readAll;
  825. bool adjustStart = true;
  826. var recorder = new DuplicateDocumentRecorder(indexSearcher,
  827. parent,
  828. documentsAlreadySeenInPreviousPage,
  829. alreadyReturned,
  830. fieldsToFetch,
  831. parent.IsMapReduce || fieldsToFetch.IsProjection);
  832. do
  833. {
  834. if (skippedResultsInCurrentLoop > 0)
  835. {
  836. start = start + pageSize - (start - indexQuery.Start); // need to "undo" the index adjustment
  837. // trying to guesstimate how many results we will need to read from the index
  838. // to get enough unique documents to match the page size
  839. pageSize = Math.Max(2, skippedResultsInCurrentLoop) * pageSize;
  840. skippedResultsInCurrentLoop = 0;
  841. }
  842. TopDocs search;
  843. int moreRequired;
  844. do
  845. {
  846. token.ThrowIfCancellationRequested();
  847. search = ExecuteQuery(indexSearcher, luceneQuery, start, pageSize, indexQuery);
  848. moreRequired = recorder.RecordResultsAlreadySeenForDistinctQuery(search, adjustStart, pageSize, ref start);
  849. pageSize += moreRequired * 2;
  850. } while (moreRequired > 0);
  851. indexQuery.TotalSize.Value = search.TotalHits;
  852. adjustStart = false;
  853. SetupHighlighter(luceneQuery);
  854. for (var i = start; (i - start) < pageSize && i < search.ScoreDocs.Length; i++)
  855. {
  856. var scoreDoc = search.ScoreDocs[i];
  857. var document = indexSearcher.Doc(scoreDoc.Doc);
  858. var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, scoreDoc);
  859. if (ShouldIncludeInResults(indexQueryResult) == false)
  860. {
  861. indexQuery.SkippedResults.Value++;
  862. skippedResultsInCurrentLoop++;
  863. continue;
  864. }
  865. AddHighlighterResults(indexSearcher, scoreDoc, indexQueryResult);
  866. returnedResults++;
  867. yield return indexQueryResult;
  868. if (returnedResults == indexQuery.PageSize)
  869. yield break;
  870. }
  871. readAll = search.TotalHits == search.ScoreDocs.Length;
  872. } while (returnedResults < indexQuery.PageSize && readAll == false);
  873. }
  874. }
  875. }
  876. private void AddHighlighterResults(IndexSearcher indexSearcher, ScoreDoc scoreDoc, IndexQueryResult indexQueryResult)
  877. {
  878. if (highlighter == null)
  879. return;
  880. var highlightings =
  881. from highlightedField in this.indexQuery.HighlightedFields
  882. select new
  883. {
  884. highlightedField.Field,
  885. highlightedField.FragmentsField,
  886. Fragments = highlighter.GetBestFragments(
  887. fieldQuery,
  888. indexSearcher.IndexReader,
  889. scoreDoc.Doc,
  890. highlightedField.Field,
  891. highlightedField.FragmentLength,
  892. highlightedField.FragmentCount)
  893. }
  894. into fieldHighlitings
  895. where fieldHighlitings.Fragments != null &&
  896. fieldHighlitings.Fragments.Length > 0
  897. select fieldHighlitings;
  898. if (fieldsToFetch.IsProjection || parent.IsMapReduce)
  899. {
  900. foreach (var highlighting in highlightings)
  901. {
  902. if (!string.IsNullOrEmpty(highlighting.FragmentsField))
  903. {
  904. indexQueryResult.Projection[highlighting.FragmentsField] = new RavenJArray(highlighting.Fragments);
  905. }
  906. }
  907. }
  908. else
  909. {
  910. indexQueryResult.Highligtings = highlightings.ToDictionary(x => x.Field, x => x.Fragments);
  911. }
  912. }
  913. private void SetupHighlighter(Query luceneQuery)
  914. {
  915. if (indexQuery.HighlightedFields != null && indexQuery.HighlightedFields.Length > 0)
  916. {
  917. highlighter = new FastVectorHighlighter(
  918. FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
  919. FastVectorHighlighter.DEFAULT_FIELD_MATCH,
  920. new SimpleFragListBuilder(),
  921. new SimpleFragmentsBuilder(
  922. indexQuery.HighlighterPreTags != null && indexQuery.HighlighterPreTags.Any()
  923. ? indexQuery.HighlighterPreTags
  924. : BaseFragmentsBuilder.COLORED_PRE_TAGS,
  925. indexQuery.HighlighterPostTags != null && indexQuery.HighlighterPostTags.Any()
  926. ? indexQuery.HighlighterPostTags
  927. : BaseFragmentsBuilder.COLORED_POST_TAGS));
  928. fieldQuery = highlighter.GetFieldQuery(luceneQuery);
  929. }
  930. }
  931. private Query ApplyIndexTriggers(Query luceneQuery)
  932. {
  933. luceneQuery = indexQueryTriggers.Aggregate(luceneQuery,
  934. (current, indexQueryTrigger) =>
  935. indexQueryTrigger.Value.ProcessQuery(parent.name, current, indexQuery));
  936. return luceneQuery;
  937. }
  938. public IEnumerable<IndexQueryResult> IntersectionQuery(CancellationToken token)
  939. {
  940. using (IndexStorage.EnsureInvariantCulture())
  941. {
  942. AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, parent.viewGenerator);
  943. IndexSearcher indexSearcher;
  944. using (parent.GetSearcher(out indexSearcher))
  945. {
  946. var subQueries = indexQuery.Query.Split(new[] { Constants.IntersectSeparator }, StringSplitOptions.RemoveEmptyEntries);
  947. if (subQueries.Length <= 1)
  948. throw new InvalidOperationException("Invalid INTERSECT query, must have multiple intersect clauses.");
  949. //Not sure how to select the page size here??? The problem is that only docs in this search can be part
  950. //of the final result because we're doing an intersection query (but we might exclude some of them)
  951. int pageSizeBestGuess = (indexQuery.Start + indexQuery.PageSize) * 2;
  952. int intersectMatches = 0, skippedResultsInCurrentLoop = 0;
  953. int previousBaseQueryMatches = 0, currentBaseQueryMatches = 0;
  954. var firstSubLuceneQuery = GetLuceneQuery(subQueries[0], indexQuery);
  955. //Do the first sub-query in the normal way, so that sorting, filtering etc is accounted for
  956. var search = ExecuteQuery(indexSearcher, firstSubLuceneQuery, 0, pageSizeBestGuess, indexQuery);
  957. currentBaseQueryMatches = search.ScoreDocs.Length;
  958. var intersectionCollector = new IntersectionCollector(indexSearcher, search.ScoreDocs);
  959. do
  960. {
  961. token.ThrowIfCancellationRequested();
  962. if (skippedResultsInCurrentLoop > 0)
  963. {
  964. // We get here because out first attempt didn't get enough docs (after INTERSECTION was calculated)
  965. pageSizeBestGuess = pageSizeBestGuess * 2;
  966. search = ExecuteQuery(indexSearcher, firstSubLuceneQuery, 0, pageSizeBestGuess, indexQuery);
  967. previousBaseQueryMatches = currentBaseQueryMatches;
  968. currentBaseQueryMatches = search.ScoreDocs.Length;
  969. intersectionCollector = new IntersectionCollector(indexSearcher, search.ScoreDocs);
  970. }
  971. for (int i = 1; i < subQueries.Length; i++)
  972. {
  973. var luceneSubQuery = GetLuceneQuery(subQueries[i], indexQuery);
  974. indexSearcher.Search(luceneSubQuery, null, intersectionCollector);
  975. }
  976. var currentIntersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
  977. intersectMatches = currentIntersectResults.Count;
  978. skippedResultsInCurrentLoop = pageSizeBestGuess - intersectMatches;
  979. } while (intersectMatches < indexQuery.PageSize && //stop if we've got enough results to satisfy the pageSize
  980. currentBaseQueryMatches < search.TotalHits && //stop if increasing the page size wouldn't make any difference
  981. previousBaseQueryMatches < currentBaseQueryMatches); //stop if increasing the page size didn't result in any more "base query" results
  982. var intersectResults = intersectionCollector.DocumentsIdsForCount(subQueries.Length).ToList();
  983. //It's hard to know what to do here, the TotalHits from the base search isn't really the TotalSize,
  984. //because it's before the INTERSECTION has been applied, so only some of those results make it out.
  985. //Trying to give an accurate answer is going to be too costly, so we aren't going to try.
  986. indexQuery.TotalSize.Value = search.TotalHits;
  987. indexQuery.SkippedResults.Value = skippedResultsInCurrentLoop;
  988. //Using the final set of results in the intersectionCollector
  989. int returnedResults = 0;
  990. for (int i = indexQuery.Start; i < intersectResults.Count && (i - indexQuery.Start) < pageSizeBestGuess; i++)
  991. {
  992. Document document = indexSearcher.Doc(intersectResults[i].LuceneId);
  993. IndexQueryResult indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]);
  994. if (ShouldIncludeInResults(indexQueryResult) == false)
  995. {
  996. indexQuery.SkippedResults.Value++;
  997. skippedResultsInCurrentLoop++;
  998. continue;
  999. }
  1000. returnedResults++;
  1001. yield return indexQueryResult;
  1002. if (returnedResults == indexQuery.PageSize)
  1003. yield break;
  1004. }
  1005. }
  1006. }
  1007. }
  1008. private bool ShouldIncludeInResults(IndexQueryResult indexQueryResult)
  1009. {
  1010. if (shouldIncludeInResults(indexQueryResult) == false)
  1011. return false;
  1012. if (documentsAlreadySeenInPreviousPage.Contains(indexQueryResult.Key))
  1013. return false;
  1014. if (fieldsToFetch.IsDistinctQuery && alreadyReturned.Add(indexQueryResult.Projection) == false)
  1015. return false;
  1016. return true;
  1017. }
  1018. private void RecordResultsAlreadySeenForDistinctQuery(IndexSearcher indexSearcher, TopDocs search, int start, int pageSize)
  1019. {
  1020. var min = Math.Min(start, search.TotalHits);
  1021. // we are paging, we need to check that we don't have duplicates in the previous page
  1022. // see here for details: http://groups.google.com/group/ravendb/browse_frm/thread/d71c44aa9e2a7c6e
  1023. if (parent.IsMapReduce == false && fieldsToFetch.IsProjection == false && start - pageSize >= 0 && start < search.TotalHits)
  1024. {
  1025. for (int i = start - pageSize; i < min; i++)
  1026. {
  1027. var document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
  1028. documentsAlreadySeenInPreviousPage.Add(document.Get(Constants.DocumentIdFieldName));
  1029. }
  1030. }
  1031. if (fieldsToFetch.IsDistinctQuery == false)
  1032. return;
  1033. // add results that were already there in previous pages
  1034. for (int i = 0; i < min; i++)
  1035. {
  1036. Document document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
  1037. var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]);
  1038. alreadyReturned.Add(indexQueryResult.Projection);
  1039. }
  1040. }
  1041. public Query GetLuceneQuery()
  1042. {
  1043. var q = GetLuceneQuery(indexQuery.Query, indexQuery);
  1044. var spatialIndexQuery = indexQuery as SpatialIndexQuery;
  1045. if (spatialIndexQuery != null)
  1046. {
  1047. var spatialField = parent.viewGenerator.GetSpatialField(spatialIndexQuery.SpatialFieldName);
  1048. var dq = spatialField.MakeQuery(q, spatialField.GetStrategy(), spatialIndexQuery);
  1049. if (q is MatchAllDocsQuery) return dq;
  1050. var bq = new BooleanQuery { { q, Occur.MUST }, { dq, Occur.MUST } };
  1051. return bq;
  1052. }
  1053. return q;
  1054. }
  1055. private Query GetLuceneQuery(string query, IndexQuery indexQuery)
  1056. {
  1057. Query luceneQuery;
  1058. if (String.IsNullOrEmpty(query))
  1059. {
  1060. logQuerying.Debug("Issuing query on index {0} for all documents", parent.name);
  1061. luceneQuery = new MatchAllDocsQuery();
  1062. }
  1063. else
  1064. {
  1065. logQuerying.Debug("Issuing query on index {0} for: {1}", parent.name, query);
  1066. var toDispose = new List<Action>();
  1067. RavenPerFieldAnalyzerWrapper searchAnalyzer = null;
  1068. try
  1069. {
  1070. searchAnalyzer = parent.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
  1071. searchAnalyzer = parent.AnalyzerGenerators.Aggregate(searchAnalyzer, (currentAnalyzer, generator) =>
  1072. {
  1073. Analyzer newAnalyzer = generator.GenerateAnalyzerForQuerying(parent.name, indexQuery.Query, currentAnalyzer);
  1074. if (newAnalyzer != currentAnalyzer)
  1075. {
  1076. DisposeAnalyzerAndFriends(toDispose, currentAnalyzer);
  1077. }
  1078. return parent.CreateAnalyzer(newAnalyzer, toDispose, true);
  1079. });
  1080. luceneQuery = QueryBuilder.BuildQuery(query, indexQuery, searchAnalyzer);
  1081. }
  1082. finally
  1083. {
  1084. DisposeAnalyzerAndFriends(toDispose, searchAnalyzer);
  1085. }
  1086. }
  1087. return ApplyIndexTriggers(luceneQuery);
  1088. }
  1089. private static void DisposeAnalyzerAndFriends(List<Action> toDispose, RavenPerFieldAnalyzerWrapper analyzer)
  1090. {
  1091. if (analyzer != null)
  1092. analyzer.Close();
  1093. foreach (Action dispose in toDispose)
  1094. {
  1095. dispose();
  1096. }
  1097. toDispose.Clear();
  1098. }
  1099. private TopDocs ExecuteQuery(IndexSearcher indexSearcher, Query luceneQuery, int start, int pageSize,
  1100. IndexQuery indexQuery)
  1101. {
  1102. var sort = indexQuery.GetSort(parent.indexDefinition, parent.viewGenerator);
  1103. if (pageSize == Int32.MaxValue && sort == null) // we want all docs, no sorting required
  1104. {
  1105. var gatherAllCollector = new GatherAllCollector();
  1106. indexSearcher.Search(luceneQuery, gatherAllCollector);
  1107. return gatherAllCollector.ToTopDocs();
  1108. }
  1109. int absFullPage = Math.Abs(pageSize + start); // need to protect against ridicilously high values of pageSize + start that overflow
  1110. var minPageSize = Math.Max(absFullPage, 1);
  1111. // NOTE: We get Start + Pagesize results back so we have something to page on
  1112. if (sort != null)
  1113. {
  1114. try
  1115. {
  1116. //indexSearcher.SetDefaultFieldSortScoring (sort.GetSort().Contains(SortField.FIELD_SCORE), false);
  1117. indexSearcher.SetDefaultFieldSortScoring(true, false);
  1118. var ret = indexSearcher.Search(luceneQuery, null, minPageSize, sort);
  1119. return ret;
  1120. }
  1121. finally
  1122. {
  1123. indexSearcher.SetDefaultFieldSortScoring(false, false);
  1124. }
  1125. }
  1126. return indexSearcher.Search(luceneQuery, null, minPageSize);
  1127. }
  1128. }
  1129. #endregion
  1130. public class DuplicateDocumentRecorder
  1131. {
  1132. private int min = -1;
  1133. private readonly bool isProjectionOrMapReduce;
  1134. private readonly Searchable indexSearcher;
  1135. private readonly Index parent;
  1136. private int alreadyScannedPositions, alreadyScannedPositionsForDistinct;
  1137. private readonly HashSet<string> documentsAlreadySeenInPreviousPage;
  1138. private readonly HashSet<RavenJObject> alreadyReturned;
  1139. private readonly FieldsToFetch fieldsToFetch;
  1140. public DuplicateDocumentRecorder(Searchable indexSearcher,
  1141. Index parent,
  1142. HashSet<string> documentsAlreadySeenInPreviousPage,
  1143. HashSet<RavenJObject> alreadyReturned,
  1144. FieldsToFetch fieldsToFetch,
  1145. bool isProjectionOrMapReduce)
  1146. {
  1147. this.indexSearcher = indexSearcher;
  1148. this.parent = parent;
  1149. this.isProjectionOrMapReduce = isProjectionOrMapReduce;
  1150. this.alreadyReturned = alreadyReturned;
  1151. this.fieldsToFetch = fieldsToFetch;
  1152. this.documentsAlreadySeenInPreviousPage = documentsAlreadySeenInPreviousPage;
  1153. }
  1154. public int RecordResultsAlreadySeenForDistinctQuery(TopDocs search, bool adjustStart, int pageSize, ref int start)
  1155. {
  1156. int itemsSkipped = 0;
  1157. if (min == -1)
  1158. {
  1159. min = start;
  1160. }
  1161. min = Math.Min(min, search.TotalHits);
  1162. // we are paging, we need to check that we don't have duplicates in the previous pages
  1163. // see here for details: http://groups.google.com/group/ravendb/browse_frm/thread/d71c44aa9e2a7c6e
  1164. if (isProjectionOrMapReduce == false)
  1165. {
  1166. for (int i = alreadyScannedPositions; i < min; i++)
  1167. {
  1168. if (i >= search.ScoreDocs.Length)
  1169. {
  1170. alreadyScannedPositions = i;
  1171. var pageSizeIncreaseSize = min - search.ScoreDocs.Length;
  1172. return pageSizeIncreaseSize;
  1173. }
  1174. var document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
  1175. var id = document.Get(Constants.DocumentIdFieldName);
  1176. if (documentsAlreadySeenInPreviousPage.Add(id) == false)
  1177. {
  1178. // already seen this, need to expand the range we are scanning because the user
  1179. // didn't take this into account
  1180. min = Math.Min(min + 1, search.TotalHits);
  1181. itemsSkipped++;
  1182. }
  1183. }
  1184. alreadyScannedPositions = min;
  1185. }
  1186. if (fieldsToFetch.IsDistinctQuery)
  1187. {
  1188. // add results that were already there in previous pages
  1189. for (int i = alreadyScannedPositionsForDistinct; i < min; i++)
  1190. {
  1191. if (i >= search.ScoreDocs.Length)
  1192. {
  1193. alreadyScannedPositionsForDistinct = i;
  1194. var pageSizeIncreaseSize = min - search.ScoreDocs.Length;
  1195. return pageSizeIncreaseSize;
  1196. }
  1197. Document document = indexSearcher.Doc(search.ScoreDocs[i].Doc);
  1198. var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, search.ScoreDocs[i]);
  1199. if (indexQueryResult.Projection.Count > 0 && // we don't consider empty projections to be relevant for distinct operations
  1200. alreadyReturned.Add(indexQueryResult.Projection) == false)
  1201. {
  1202. min++; // we found a duplicate
  1203. itemsSkipped++;
  1204. }
  1205. }
  1206. alreadyScannedPositionsForDistinct = min;
  1207. }
  1208. if (adjustStart)
  1209. start += itemsSkipped;
  1210. return itemsSkipped;
  1211. }
  1212. }
  1213. public IndexingPerformanceStats[] GetIndexingPerformance()
  1214. {
  1215. return currentlyIndexing.Values.Concat(indexingPerformanceStats).ToArray();
  1216. }
  1217. public void Backup(string backupDirectory, string path, string incrementalTag)
  1218. {
  1219. if (directory is RAMDirectory)
  1220. {
  1221. //if the index is memory-only, force writing index data to disk
  1222. Write((writer, analyzer, stats) =>
  1223. {
  1224. ForceWriteToDisk();
  1225. return new IndexedItemsInfo { ChangedDocs = 1 };
  1226. });
  1227. }
  1228. bool hasSnapshot = false;
  1229. bool throwOnFinallyException = true;
  1230. try
  1231. {
  1232. var existingFiles = new HashSet<string>();
  1233. if (incrementalTag != null)
  1234. backupDirectory = Path.Combine(backupDirectory, incrementalTag);
  1235. var allFilesPath = Path.Combine(backupDirectory, MonoHttpUtility.UrlEncode(name) + ".all-existing-index-files");
  1236. var saveToFolder = Path.Combine(backupDirectory, "Indexes", MonoHttpUtility.UrlEncode(name));
  1237. System.IO.Directory.CreateDirectory(saveToFolder);
  1238. if (File.Exists(allFilesPath))
  1239. {
  1240. foreach (var file in File.ReadLines(allFilesPath))
  1241. {
  1242. existingFiles.Add(file);
  1243. }
  1244. }
  1245. var neededFilePath = Path.Combine(saveToFolder, "index-files.required-for-index-restore");
  1246. using (var allFilesWriter = File.Exists(allFilesPath) ? File.AppendText(allFilesPath) : File.CreateText(allFilesPath))
  1247. using (var neededFilesWriter = File.CreateText(neededFilePath))
  1248. {
  1249. try
  1250. {
  1251. // this is called for the side effect of creating the snapshotter and the writer
  1252. // we explicitly handle the backup outside of the write, to allow concurrent indexing
  1253. Write((writer, analyzer, stats) =>
  1254. {
  1255. // however, we copy the current segments.gen & index.version to make
  1256. // sure that we get the _at the time_ of the write.
  1257. foreach (var fileName in new[] { "segments.gen", IndexStorage.IndexVersionFileName(indexDefinition)})
  1258. {
  1259. var fullPath = Path.Combine(path, MonoHttpUtility.UrlEncode(name), fileName);
  1260. File.Copy(fullPath, Path.Combine(saveToFolder, fileName));
  1261. allFilesWriter.WriteLine(fileName);
  1262. neededFilesWriter.WriteLine(fileName);
  1263. }
  1264. return new IndexedItemsInfo();
  1265. });
  1266. }
  1267. catch (CorruptIndexException e)
  1268. {
  1269. logIndexing.WarnException(
  1270. "Could not …

Large files files are truncated, but you can click here to view the full file