PageRenderTime 47ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/ToMigrate/Raven.Database/Queries/MoreLikeThisQueryRunner.cs

http://github.com/ayende/ravendb
C# | 259 lines | 209 code | 44 blank | 6 comment | 58 complexity | 38e0079da1055ecbba2cb6221219ad70 MD5 | raw file
Possible License(s): GPL-3.0, MPL-2.0-no-copyleft-exception, LGPL-2.1, Apache-2.0, BSD-3-Clause, CC-BY-SA-3.0
  1. //-----------------------------------------------------------------------
  2. // <copyright file="SuggestionQueryRunner.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. //-----------------------------------------------------------------------
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Linq;
  9. using System.Threading;
  10. using Lucene.Net.Documents;
  11. using Lucene.Net.Index;
  12. using Lucene.Net.Search;
  13. using Raven.Abstractions.Data;
  14. using Raven.Abstractions.Extensions;
  15. using Raven.Abstractions.Linq;
  16. using Raven.Abstractions.Util.Encryptors;
  17. using Raven.Database.Bundles.MoreLikeThis;
  18. using Raven.Database.Data;
  19. using Raven.Database.Impl;
  20. using Raven.Database.Indexing;
  21. using Raven.Database.Linq;
  22. using Raven.Json.Linq;
  23. using Index = Raven.Database.Indexing.Index;
  24. using Sparrow;
  25. namespace Raven.Database.Queries
  26. {
  27. public class MoreLikeThisQueryRunner
  28. {
  29. private readonly DocumentDatabase database;
  30. private HashSet<string> idsToLoad;
  31. private DocumentRetriever documentRetriever;
  32. public MoreLikeThisQueryRunner(DocumentDatabase database)
  33. {
  34. this.database = database;
  35. }
  36. public MoreLikeThisQueryResult ExecuteMoreLikeThisQuery(MoreLikeThisQuery query, int pageSize = 25)
  37. {
  38. if (query == null) throw new ArgumentNullException("query");
  39. var index = database.IndexStorage.GetIndexInstance(query.IndexName);
  40. if (index == null)
  41. throw new InvalidOperationException("The index " + query.IndexName + " cannot be found");
  42. if (string.IsNullOrEmpty(query.DocumentId) && query.MapGroupFields.Count == 0)
  43. throw new InvalidOperationException("The document id or map group fields are mandatory");
  44. IndexSearcher searcher;
  45. using (database.IndexStorage.GetCurrentIndexSearcher(index.indexId, out searcher))
  46. {
  47. var documentQuery = new BooleanQuery();
  48. if (string.IsNullOrEmpty(query.DocumentId) == false)
  49. {
  50. documentQuery.Add(new TermQuery(new Term(Constants.DocumentIdFieldName, query.DocumentId.ToLowerInvariant())), Occur.MUST);
  51. }
  52. foreach (string key in query.MapGroupFields.Keys)
  53. {
  54. documentQuery.Add(new TermQuery(new Term(key, query.MapGroupFields[key])), Occur.MUST);
  55. }
  56. var td = searcher.Search(documentQuery, 1);
  57. // get the current Lucene docid for the given RavenDB doc ID
  58. if (td.ScoreDocs.Length == 0)
  59. throw new InvalidOperationException("Document " + query.DocumentId + " could not be found");
  60. var ir = searcher.IndexReader;
  61. var mlt = new RavenMoreLikeThis(ir);
  62. AssignParameters(mlt, query);
  63. if (string.IsNullOrWhiteSpace(query.StopWordsDocumentId) == false)
  64. {
  65. var stopWordsDoc = database.Documents.Get(query.StopWordsDocumentId);
  66. if (stopWordsDoc == null)
  67. throw new InvalidOperationException("Stop words document " + query.StopWordsDocumentId + " could not be found");
  68. var stopWordsSetup = stopWordsDoc.DataAsJson.JsonDeserialization<StopWordsSetup>();
  69. if (stopWordsSetup.StopWords != null)
  70. {
  71. var stopWords = stopWordsSetup.StopWords;
  72. var ht = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
  73. foreach (var stopWord in stopWords)
  74. {
  75. ht.Add(stopWord);
  76. }
  77. mlt.SetStopWords(ht);
  78. }
  79. }
  80. var fieldNames = query.Fields ?? GetFieldNames(ir);
  81. mlt.SetFieldNames(fieldNames);
  82. var toDispose = new List<Action>();
  83. RavenPerFieldAnalyzerWrapper perFieldAnalyzerWrapper = null;
  84. try
  85. {
  86. perFieldAnalyzerWrapper = index.CreateAnalyzer(new LowerCaseKeywordAnalyzer(), toDispose, true);
  87. mlt.Analyzer = perFieldAnalyzerWrapper;
  88. var mltQuery = mlt.Like(td.ScoreDocs[0].Doc);
  89. var tsdc = TopScoreDocCollector.Create(pageSize, true);
  90. if (string.IsNullOrWhiteSpace(query.AdditionalQuery) == false)
  91. {
  92. var additionalQuery = QueryBuilder.BuildQuery(query.AdditionalQuery, perFieldAnalyzerWrapper);
  93. mltQuery = new BooleanQuery
  94. {
  95. {mltQuery, Occur.MUST},
  96. {additionalQuery, Occur.MUST},
  97. };
  98. }
  99. searcher.Search(mltQuery, tsdc);
  100. var hits = tsdc.TopDocs().ScoreDocs;
  101. var jsonDocuments = GetJsonDocuments(query, searcher, index, query.IndexName, hits, td.ScoreDocs[0].Doc);
  102. var result = new MultiLoadResult();
  103. var includedEtags = new List<byte>(jsonDocuments.SelectMany(x => x.Etag.ToByteArray()));
  104. includedEtags.AddRange(database.Indexes.GetIndexEtag(query.IndexName, null).ToByteArray());
  105. var loadedIds = new HashSet<string>(jsonDocuments.Select(x => x.Key));
  106. var addIncludesCommand = new AddIncludesCommand(database, (etag, includedDoc) =>
  107. {
  108. includedEtags.AddRange(etag.ToByteArray());
  109. result.Includes.Add(includedDoc);
  110. }, query.Includes ?? new string[0], loadedIds);
  111. idsToLoad = new HashSet<string>();
  112. database.TransactionalStorage.Batch(actions =>
  113. {
  114. documentRetriever = new DocumentRetriever(database.Configuration, actions, database.ReadTriggers, query.TransformerParameters, idsToLoad);
  115. using (new CurrentTransformationScope(database, documentRetriever))
  116. {
  117. foreach (var document in ProcessResults(query, jsonDocuments, database.WorkContext.CancellationToken))
  118. {
  119. result.Results.Add(document);
  120. addIncludesCommand.Execute(document);
  121. }
  122. }
  123. });
  124. addIncludesCommand.AlsoInclude(idsToLoad);
  125. Etag computedEtag = Etag.FromHash(Hashing.Metro128.Calculate(includedEtags.ToArray()));
  126. return new MoreLikeThisQueryResult
  127. {
  128. Etag = computedEtag,
  129. Result = result,
  130. };
  131. }
  132. finally
  133. {
  134. if (perFieldAnalyzerWrapper != null)
  135. perFieldAnalyzerWrapper.Close();
  136. foreach (var action in toDispose)
  137. {
  138. action();
  139. }
  140. }
  141. }
  142. }
  143. private IEnumerable<RavenJObject> ProcessResults(MoreLikeThisQuery query, IEnumerable<JsonDocument> documents, CancellationToken token)
  144. {
  145. IndexingFunc transformFunc = null;
  146. if (string.IsNullOrEmpty(query.ResultsTransformer) == false)
  147. {
  148. var transformGenerator = database.IndexDefinitionStorage.GetTransformer(query.ResultsTransformer);
  149. if (transformGenerator != null && transformGenerator.TransformResultsDefinition != null)
  150. transformFunc = transformGenerator.TransformResultsDefinition;
  151. else
  152. throw new InvalidOperationException("The transformer " + query.ResultsTransformer + " was not found");
  153. }
  154. IEnumerable<RavenJObject> results;
  155. var transformerErrors = new List<string>();
  156. if (transformFunc == null)
  157. results = documents.Select(x => x.ToJson());
  158. else
  159. {
  160. var robustEnumerator = new RobustEnumerator(token, 100,
  161. onError: (exception, o) => transformerErrors.Add(string.Format("Doc '{0}', Error: {1}", Index.TryGetDocKey(o), exception.Message)));
  162. results = robustEnumerator
  163. .RobustEnumeration(documents.Select(x => new DynamicJsonObject(x.ToJson())).GetEnumerator(), transformFunc)
  164. .Select(JsonExtensions.ToJObject);
  165. }
  166. return results;
  167. }
  168. private JsonDocument[] GetJsonDocuments(MoreLikeThisQuery parameters, IndexSearcher searcher, Index index, string indexName, IEnumerable<ScoreDoc> hits, int baseDocId)
  169. {
  170. if (string.IsNullOrEmpty(parameters.DocumentId) == false)
  171. {
  172. var documentIds = hits
  173. .Where(hit => hit.Doc != baseDocId)
  174. .Select(hit => searcher.Doc(hit.Doc).Get(Constants.DocumentIdFieldName))
  175. .Where(x => x != null)
  176. .Distinct();
  177. return documentIds
  178. .Select(docId => database.Documents.Get(docId))
  179. .Where(it => it != null)
  180. .ToArray();
  181. }
  182. var fields = searcher.Doc(baseDocId).GetFields().Cast<AbstractField>().Select(x => x.Name).Distinct().ToArray();
  183. var etag = database.Indexes.GetIndexEtag(indexName, null);
  184. return hits
  185. .Where(hit => hit.Doc != baseDocId)
  186. .Select(hit => new JsonDocument
  187. {
  188. DataAsJson = Index.CreateDocumentFromFields(searcher.Doc(hit.Doc),
  189. new FieldsToFetch(fields, false, index.IsMapReduce ? Constants.ReduceKeyFieldName : Constants.DocumentIdFieldName)),
  190. Etag = etag
  191. })
  192. .ToArray();
  193. }
  194. private static void AssignParameters(Lucene.Net.Search.Similar.MoreLikeThis mlt, MoreLikeThisQuery parameters)
  195. {
  196. if (parameters.Boost != null) mlt.Boost = parameters.Boost.Value;
  197. if (parameters.BoostFactor != null) mlt.BoostFactor = parameters.BoostFactor.Value;
  198. if (parameters.MaximumNumberOfTokensParsed != null) mlt.MaxNumTokensParsed = parameters.MaximumNumberOfTokensParsed.Value;
  199. if (parameters.MaximumQueryTerms != null) mlt.MaxQueryTerms = parameters.MaximumQueryTerms.Value;
  200. if (parameters.MinimumWordLength != null) mlt.MinWordLen = parameters.MinimumWordLength.Value;
  201. if (parameters.MaximumWordLength != null) mlt.MaxWordLen = parameters.MaximumWordLength.Value;
  202. if (parameters.MinimumTermFrequency != null) mlt.MinTermFreq = parameters.MinimumTermFrequency.Value;
  203. if (parameters.MinimumDocumentFrequency != null) mlt.MinDocFreq = parameters.MinimumDocumentFrequency.Value;
  204. if (parameters.MaximumDocumentFrequency != null) mlt.MaxDocFreq = parameters.MaximumDocumentFrequency.Value;
  205. if (parameters.MaximumDocumentFrequencyPercentage != null) mlt.SetMaxDocFreqPct(parameters.MaximumDocumentFrequencyPercentage.Value);
  206. }
  207. private static string[] GetFieldNames(IndexReader indexReader)
  208. {
  209. var fields = indexReader.GetFieldNames(IndexReader.FieldOption.INDEXED);
  210. return fields
  211. .Where(x => x != Constants.DocumentIdFieldName && x != Constants.ReduceKeyFieldName)
  212. .ToArray();
  213. }
  214. }
  215. }