PageRenderTime 43ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/Raven.Database/Queries/FacetedQueryRunner.cs

https://github.com/nwendel/ravendb
C# | 631 lines | 551 code | 77 blank | 3 comment | 105 complexity | f7594b3c9fcffd81f11d5571aa15d06d MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, CC-BY-SA-3.0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.Globalization;
  5. using System.Linq;
  6. using Lucene.Net.Documents;
  7. using Lucene.Net.Index;
  8. using Lucene.Net.Search;
  9. using Lucene.Net.Util;
  10. using Raven.Abstractions.Data;
  11. using Raven.Abstractions.Extensions;
  12. using Raven.Abstractions.Indexing;
  13. using Raven.Database.Indexing;
  14. using Raven.Database.Linq;
  15. namespace Raven.Database.Queries
  16. {
  17. using Raven.Abstractions;
  18. using Raven.Abstractions.Util;
  19. public class FacetedQueryRunner
  20. {
  21. private readonly DocumentDatabase database;
  22. public FacetedQueryRunner(DocumentDatabase database)
  23. {
  24. this.database = database;
  25. }
  26. public FacetResults GetFacets(string index, IndexQuery indexQuery, List<Facet> facets, int start = 0, int? pageSize = null)
  27. {
  28. var sp = Stopwatch.StartNew();
  29. var results = new FacetResults();
  30. var defaultFacets = new Dictionary<string, Facet>();
  31. var rangeFacets = new Dictionary<string, List<ParsedRange>>();
  32. var viewGenerator = database.IndexDefinitionStorage.GetViewGenerator(index);
  33. Index.AssertQueryDoesNotContainFieldsThatAreNotIndexed(indexQuery, viewGenerator);
  34. foreach (var facet in facets)
  35. {
  36. var key = string.IsNullOrWhiteSpace(facet.DisplayName) ? facet.Name : facet.DisplayName;
  37. defaultFacets[key] = facet;
  38. if (facet.Aggregation != FacetAggregation.Count && facet.Aggregation != FacetAggregation.None)
  39. {
  40. if (string.IsNullOrEmpty(facet.AggregationField))
  41. throw new InvalidOperationException("Facet " + facet.Name + " cannot have aggregation set to " +
  42. facet.Aggregation + " without having a value in AggregationField");
  43. if (facet.AggregationField.EndsWith("_Range") == false)
  44. {
  45. if( QueryForFacets.IsAggregationTypeNumerical(facet.AggregationType))
  46. facet.AggregationField = facet.AggregationField + "_Range";
  47. }
  48. }
  49. switch (facet.Mode)
  50. {
  51. case FacetMode.Default:
  52. results.Results[key] = new FacetResult();
  53. break;
  54. case FacetMode.Ranges:
  55. rangeFacets[key] = facet.Ranges.Select(range => ParseRange(facet.Name, range)).ToList();
  56. results.Results[key] = new FacetResult
  57. {
  58. Values = facet.Ranges.Select(range => new FacetValue
  59. {
  60. Range = range,
  61. }).ToList()
  62. };
  63. break;
  64. default:
  65. throw new ArgumentException(string.Format("Could not understand '{0}'", facet.Mode));
  66. }
  67. }
  68. var queryForFacets = new QueryForFacets(database, index, defaultFacets, rangeFacets, indexQuery, results, start, pageSize);
  69. queryForFacets.Execute();
  70. results.Duration = sp.Elapsed;
  71. return results;
  72. }
  73. private static ParsedRange ParseRange(string field, string range)
  74. {
  75. var parts = range.Split(new[] { " TO " }, 2, StringSplitOptions.RemoveEmptyEntries);
  76. if (parts.Length != 2)
  77. throw new ArgumentException("Could not understand range query: " + range);
  78. var trimmedLow = parts[0].Trim();
  79. var trimmedHigh = parts[1].Trim();
  80. var parsedRange = new ParsedRange
  81. {
  82. Field = field,
  83. RangeText = range,
  84. LowInclusive = IsInclusive(trimmedLow.First()),
  85. HighInclusive = IsInclusive(trimmedHigh.Last()),
  86. LowValue = trimmedLow.Substring(1),
  87. HighValue = trimmedHigh.Substring(0, trimmedHigh.Length - 1)
  88. };
  89. if (RangeQueryParser.NumericRangeValue.IsMatch(parsedRange.LowValue))
  90. {
  91. parsedRange.LowValue = NumericStringToSortableNumeric(parsedRange.LowValue);
  92. }
  93. if (RangeQueryParser.NumericRangeValue.IsMatch(parsedRange.HighValue))
  94. {
  95. parsedRange.HighValue = NumericStringToSortableNumeric(parsedRange.HighValue);
  96. }
  97. if (parsedRange.LowValue == "NULL" || parsedRange.LowValue == "*")
  98. parsedRange.LowValue = null;
  99. if (parsedRange.HighValue == "NULL" || parsedRange.HighValue == "*")
  100. parsedRange.HighValue = null;
  101. parsedRange.LowValue = UnescapeValueIfNecessary(parsedRange.LowValue);
  102. parsedRange.HighValue = UnescapeValueIfNecessary(parsedRange.HighValue);
  103. return parsedRange;
  104. }
  105. private static string UnescapeValueIfNecessary(string value)
  106. {
  107. if (string.IsNullOrEmpty(value))
  108. return value;
  109. var unescapedValue = QueryBuilder.Unescape(value);
  110. DateTime _;
  111. if (DateTime.TryParseExact(unescapedValue, Default.OnlyDateTimeFormat, CultureInfo.InvariantCulture, DateTimeStyles.RoundtripKind, out _))
  112. return unescapedValue;
  113. return value;
  114. }
  115. private static string NumericStringToSortableNumeric(string value)
  116. {
  117. var number = NumberUtil.StringToNumber(value);
  118. if (number is int)
  119. {
  120. return NumericUtils.IntToPrefixCoded((int)number);
  121. }
  122. if (number is long)
  123. {
  124. return NumericUtils.LongToPrefixCoded((long)number);
  125. }
  126. if (number is float)
  127. {
  128. return NumericUtils.FloatToPrefixCoded((float)number);
  129. }
  130. if (number is double)
  131. {
  132. return NumericUtils.DoubleToPrefixCoded((double)number);
  133. }
  134. throw new ArgumentException("Unknown type for " + number.GetType() + " which started as " + value);
  135. }
  136. private static bool IsInclusive(char ch)
  137. {
  138. switch (ch)
  139. {
  140. case '[':
  141. case ']':
  142. return true;
  143. case '{':
  144. case '}':
  145. return false;
  146. default:
  147. throw new ArgumentException("Could not understand range prefix: " + ch);
  148. }
  149. }
  150. private class ParsedRange
  151. {
  152. public bool LowInclusive;
  153. public bool HighInclusive;
  154. public string LowValue;
  155. public string HighValue;
  156. public string RangeText;
  157. public string Field;
  158. public bool IsMatch(string value)
  159. {
  160. var compareLow =
  161. LowValue == null
  162. ? -1
  163. : string.CompareOrdinal(value, LowValue);
  164. var compareHigh = HighValue == null ? 1 : string.CompareOrdinal(value, HighValue);
  165. // if we are range exclusive on either end, check that we will skip the edge values
  166. if (compareLow == 0 && LowInclusive == false ||
  167. compareHigh == 0 && HighInclusive == false)
  168. return false;
  169. if (LowValue != null && compareLow < 0)
  170. return false;
  171. if (HighValue != null && compareHigh > 0)
  172. return false;
  173. return true;
  174. }
  175. public override string ToString()
  176. {
  177. return string.Format("{0}:{1}", Field, RangeText);
  178. }
  179. }
  180. private class QueryForFacets
  181. {
  182. private readonly Dictionary<FacetValue, FacetValueState> matches = new Dictionary<FacetValue, FacetValueState>();
  183. private readonly IndexDefinition indexDefinition;
  184. public QueryForFacets(
  185. DocumentDatabase database,
  186. string index,
  187. Dictionary<string, Facet> facets,
  188. Dictionary<string, List<ParsedRange>> ranges,
  189. IndexQuery indexQuery,
  190. FacetResults results,
  191. int start,
  192. int? pageSize)
  193. {
  194. Database = database;
  195. Index = index;
  196. Facets = facets;
  197. Ranges = ranges;
  198. IndexQuery = indexQuery;
  199. Results = results;
  200. Start = start;
  201. PageSize = pageSize;
  202. indexDefinition = Database.IndexDefinitionStorage.GetIndexDefinition(this.Index);
  203. }
  204. DocumentDatabase Database { get; set; }
  205. string Index { get; set; }
  206. Dictionary<string, Facet> Facets { get; set; }
  207. Dictionary<string, List<ParsedRange>> Ranges { get; set; }
  208. IndexQuery IndexQuery { get; set; }
  209. FacetResults Results { get; set; }
  210. private int Start { get; set; }
  211. private int? PageSize { get; set; }
  212. public void Execute()
  213. {
  214. ValidateFacets();
  215. //We only want to run the base query once, so we capture all of the facet-ing terms then run the query
  216. // once through the collector and pull out all of the terms in one shot
  217. var allCollector = new GatherAllCollector();
  218. var facetsByName = new Dictionary<string, Dictionary<string, FacetValue>>();
  219. using (var currentState = Database.IndexStorage.GetCurrentStateHolder(Index))
  220. {
  221. var currentIndexSearcher = currentState.IndexSearcher;
  222. var baseQuery = Database.IndexStorage.GetDocumentQuery(Index, IndexQuery, Database.IndexQueryTriggers);
  223. currentIndexSearcher.Search(baseQuery, allCollector);
  224. var fields = Facets.Values.Select(x => x.Name)
  225. .Concat(Ranges.Select(x => x.Key));
  226. var fieldsToRead = new HashSet<string>(fields);
  227. IndexedTerms.ReadEntriesForFields(currentState,
  228. fieldsToRead,
  229. allCollector.Documents,
  230. (term, doc) =>
  231. {
  232. var facets = Facets.Values.Where(facet => facet.Name == term.Field);
  233. foreach (var facet in facets)
  234. {
  235. switch (facet.Mode)
  236. {
  237. case FacetMode.Default:
  238. var facetValues = facetsByName.GetOrAdd(facet.DisplayName);
  239. FacetValue existing;
  240. if (facetValues.TryGetValue(term.Text, out existing) == false)
  241. {
  242. existing = new FacetValue
  243. {
  244. Range = GetRangeName(term)
  245. };
  246. facetValues[term.Text] = existing;
  247. }
  248. ApplyFacetValueHit(existing, facet, doc, null);
  249. break;
  250. case FacetMode.Ranges:
  251. List<ParsedRange> list;
  252. if (Ranges.TryGetValue(term.Field, out list))
  253. {
  254. for (int i = 0; i < list.Count; i++)
  255. {
  256. var parsedRange = list[i];
  257. if (parsedRange.IsMatch(term.Text))
  258. {
  259. var facetValue = Results.Results[term.Field].Values[i];
  260. ApplyFacetValueHit(facetValue, facet, doc, parsedRange);
  261. }
  262. }
  263. }
  264. break;
  265. default:
  266. throw new ArgumentOutOfRangeException();
  267. }
  268. }
  269. });
  270. UpdateFacetResults(facetsByName);
  271. CompleteFacetCalculationsStage1(currentState);
  272. CompleteFacetCalculationsStage2();
  273. }
  274. }
  275. private void ValidateFacets()
  276. {
  277. foreach (var facet in Facets.Where(facet => IsAggregationNumerical(facet.Value.Aggregation) && IsAggregationTypeNumerical(facet.Value.AggregationType) && GetSortOptionsForFacet(facet.Value.AggregationField) == SortOptions.None))
  278. {
  279. throw new InvalidOperationException(string.Format("Index '{0}' does not have sorting enabled for a numerical field '{1}'.", this.Index, facet.Value.AggregationField));
  280. }
  281. }
  282. private static bool IsAggregationNumerical(FacetAggregation aggregation)
  283. {
  284. switch (aggregation)
  285. {
  286. case FacetAggregation.Average:
  287. case FacetAggregation.Count:
  288. case FacetAggregation.Max:
  289. case FacetAggregation.Min:
  290. case FacetAggregation.Sum:
  291. return true;
  292. default:
  293. return false;
  294. }
  295. }
  296. public static bool IsAggregationTypeNumerical(string aggregationType)
  297. {
  298. var type = Type.GetType(aggregationType, false, true);
  299. if (type == null)
  300. return false;
  301. var numericalTypes = new List<Type>
  302. {
  303. typeof(decimal),
  304. typeof(int),
  305. typeof(long),
  306. typeof(short),
  307. typeof(float),
  308. typeof(double)
  309. };
  310. return numericalTypes.Any(numericalType => numericalType == type);
  311. }
  312. private string GetRangeName(Term term)
  313. {
  314. var sortOptions = GetSortOptionsForFacet(term.Field);
  315. switch (sortOptions)
  316. {
  317. case SortOptions.String:
  318. case SortOptions.None:
  319. case SortOptions.Custom:
  320. case SortOptions.StringVal:
  321. return term.Text;
  322. case SortOptions.Int:
  323. if (IsStringNumber(term))
  324. return term.Text;
  325. return NumericUtils.PrefixCodedToInt(term.Text).ToString(CultureInfo.InvariantCulture);
  326. case SortOptions.Long:
  327. if (IsStringNumber(term))
  328. return term.Text;
  329. return NumericUtils.PrefixCodedToLong(term.Text).ToString(CultureInfo.InvariantCulture);
  330. case SortOptions.Double:
  331. if (IsStringNumber(term))
  332. return term.Text;
  333. return NumericUtils.PrefixCodedToDouble(term.Text).ToString(CultureInfo.InvariantCulture);
  334. case SortOptions.Float:
  335. if (IsStringNumber(term))
  336. return term.Text;
  337. return NumericUtils.PrefixCodedToFloat(term.Text).ToString(CultureInfo.InvariantCulture);
  338. case SortOptions.Byte:
  339. case SortOptions.Short:
  340. default:
  341. throw new ArgumentException("Can't get range name from sort option" + sortOptions);
  342. }
  343. }
  344. private bool IsStringNumber(Term term)
  345. {
  346. if (term == null || string.IsNullOrEmpty(term.Text))
  347. return false;
  348. return char.IsDigit(term.Text[0]);
  349. }
  350. private void CompleteFacetCalculationsStage2()
  351. {
  352. foreach (var facetResult in Results.Results)
  353. {
  354. var key = facetResult.Key;
  355. foreach (var facet in Facets.Values.Where(f => f.DisplayName == key))
  356. {
  357. if (facet.Aggregation.HasFlag(FacetAggregation.Count))
  358. {
  359. foreach (var facetValue in facetResult.Value.Values)
  360. {
  361. facetValue.Count = facetValue.Hits;
  362. }
  363. }
  364. if (facet.Aggregation.HasFlag(FacetAggregation.Average))
  365. {
  366. foreach (var facetValue in facetResult.Value.Values)
  367. {
  368. if (facetValue.Hits == 0)
  369. facetValue.Average = double.NaN;
  370. else
  371. facetValue.Average = facetValue.Average / facetValue.Hits;
  372. }
  373. }
  374. }
  375. }
  376. }
  377. private void CompleteFacetCalculationsStage1(IndexSearcherHolder.IndexSearcherHoldingState state)
  378. {
  379. var fieldsToRead = new HashSet<string>(Facets
  380. .Where(x => x.Value.Aggregation != FacetAggregation.None && x.Value.Aggregation != FacetAggregation.Count)
  381. .Select(x => x.Value.AggregationField)
  382. .Where(x => x != null));
  383. if (fieldsToRead.Count == 0)
  384. return;
  385. var allDocs = new HashSet<int>(matches.Values.SelectMany(x => x.Docs));
  386. IndexedTerms.ReadEntriesForFields(state, fieldsToRead, allDocs, GetValueFromIndex, (term, currentVal, docId) =>
  387. {
  388. foreach (var match in matches)
  389. {
  390. if (match.Value.Docs.Contains(docId) == false)
  391. continue;
  392. var facet = match.Value.Facet;
  393. if (term.Field != facet.AggregationField)
  394. continue;
  395. switch (facet.Mode)
  396. {
  397. case FacetMode.Default:
  398. ApplyAggregation(facet, match.Key, currentVal);
  399. break;
  400. case FacetMode.Ranges:
  401. if (!match.Value.Range.IsMatch(term.Text))
  402. continue;
  403. ApplyAggregation(facet, match.Key, currentVal);
  404. break;
  405. default:
  406. throw new ArgumentOutOfRangeException();
  407. }
  408. }
  409. });
  410. }
  411. private void ApplyAggregation(Facet facet, FacetValue value, double currentVal)
  412. {
  413. if (facet.Aggregation.HasFlag(FacetAggregation.Max))
  414. {
  415. value.Max = Math.Max(value.Max ?? Double.MinValue, currentVal);
  416. }
  417. if (facet.Aggregation.HasFlag(FacetAggregation.Min))
  418. {
  419. value.Min = Math.Min(value.Min ?? Double.MaxValue, currentVal);
  420. }
  421. if (facet.Aggregation.HasFlag(FacetAggregation.Sum))
  422. {
  423. value.Sum = currentVal + (value.Sum ?? 0d);
  424. }
  425. if (facet.Aggregation.HasFlag(FacetAggregation.Average))
  426. {
  427. value.Average = currentVal + (value.Average ?? 0d);
  428. }
  429. }
  430. private double GetValueFromIndex(Term term)
  431. {
  432. switch (GetSortOptionsForFacet(term.Field))
  433. {
  434. case SortOptions.String:
  435. case SortOptions.StringVal:
  436. case SortOptions.Byte:
  437. case SortOptions.Short:
  438. case SortOptions.Custom:
  439. case SortOptions.None:
  440. throw new InvalidOperationException(string.Format("Cannot perform numeric aggregation on index field '{0}'. You must set the Sort mode of the field to Int, Float, Long or Double.", TryTrimRangeSuffix(term.Field)));
  441. case SortOptions.Int:
  442. return NumericUtils.PrefixCodedToInt(term.Text);
  443. case SortOptions.Float:
  444. return NumericUtils.PrefixCodedToFloat(term.Text);
  445. case SortOptions.Long:
  446. return NumericUtils.PrefixCodedToLong(term.Text);
  447. case SortOptions.Double:
  448. return NumericUtils.PrefixCodedToDouble(term.Text);
  449. default:
  450. throw new ArgumentOutOfRangeException();
  451. }
  452. }
  453. private readonly Dictionary<string, SortOptions> cache = new Dictionary<string, SortOptions>();
  454. private SortOptions GetSortOptionsForFacet(string field)
  455. {
  456. SortOptions value;
  457. if (indexDefinition.SortOptions.TryGetValue(field, out value) == false)
  458. {
  459. if (field.EndsWith("_Range"))
  460. {
  461. var fieldWithNoRange = field.Substring(0, field.Length - "_Range".Length);
  462. if (indexDefinition.SortOptions.TryGetValue(fieldWithNoRange, out value) == false)
  463. value = SortOptions.None;
  464. }
  465. else
  466. {
  467. value = SortOptions.None;
  468. }
  469. }
  470. cache[field] = value;
  471. return value;
  472. }
  473. private string TryTrimRangeSuffix(string fieldName)
  474. {
  475. return fieldName.EndsWith("_Range") ? fieldName.Substring(0, fieldName.Length - "_Range".Length) : fieldName;
  476. }
  477. private void ApplyFacetValueHit(FacetValue facetValue, Facet value, int docId, ParsedRange parsedRange)
  478. {
  479. facetValue.Hits++;
  480. if (value.Aggregation == FacetAggregation.Count || value.Aggregation == FacetAggregation.None)
  481. {
  482. return;
  483. }
  484. FacetValueState set;
  485. if (matches.TryGetValue(facetValue, out set) == false)
  486. {
  487. matches[facetValue] = set = new FacetValueState
  488. {
  489. Docs = new HashSet<int>(),
  490. Facet = value,
  491. Range = parsedRange
  492. };
  493. }
  494. set.Docs.Add(docId);
  495. }
  496. private class FacetValueState
  497. {
  498. public HashSet<int> Docs;
  499. public Facet Facet;
  500. public ParsedRange Range;
  501. }
  502. private void UpdateFacetResults(Dictionary<string, Dictionary<string, FacetValue>> facetsByName)
  503. {
  504. foreach (var facet in Facets.Values)
  505. {
  506. if (facet.Mode == FacetMode.Ranges)
  507. continue;
  508. var values = new List<FacetValue>();
  509. List<string> allTerms;
  510. int maxResults = Math.Min(PageSize ?? facet.MaxResults ?? Database.Configuration.MaxPageSize, Database.Configuration.MaxPageSize);
  511. var groups = facetsByName.GetOrDefault(facet.DisplayName);
  512. if (groups == null)
  513. continue;
  514. switch (facet.TermSortMode)
  515. {
  516. case FacetTermSortMode.ValueAsc:
  517. allTerms = new List<string>(groups.OrderBy(x => x.Key).ThenBy(x => x.Value.Hits).Select(x => x.Key));
  518. break;
  519. case FacetTermSortMode.ValueDesc:
  520. allTerms = new List<string>(groups.OrderByDescending(x => x.Key).ThenBy(x => x.Value.Hits).Select(x => x.Key));
  521. break;
  522. case FacetTermSortMode.HitsAsc:
  523. allTerms = new List<string>(groups.OrderBy(x => x.Value.Hits).ThenBy(x => x.Key).Select(x => x.Key));
  524. break;
  525. case FacetTermSortMode.HitsDesc:
  526. allTerms = new List<string>(groups.OrderByDescending(x => x.Value.Hits).ThenBy(x => x.Key).Select(x => x.Key));
  527. break;
  528. default:
  529. throw new ArgumentException(string.Format("Could not understand '{0}'", facet.TermSortMode));
  530. }
  531. foreach (var term in allTerms.Skip(Start).TakeWhile(term => values.Count < maxResults))
  532. {
  533. var facetValue = groups.GetOrDefault(term);
  534. values.Add(facetValue ?? new FacetValue { Range = term });
  535. }
  536. var previousHits = allTerms.Take(Start).Sum(allTerm =>
  537. {
  538. var facetValue = groups.GetOrDefault(allTerm);
  539. return facetValue == null ? 0 : facetValue.Hits;
  540. });
  541. var key = string.IsNullOrWhiteSpace(facet.DisplayName) ? facet.Name : facet.DisplayName;
  542. Results.Results[key] = new FacetResult
  543. {
  544. Values = values,
  545. RemainingTermsCount = allTerms.Count - (Start + values.Count),
  546. RemainingHits = groups.Values.Sum(x => x.Hits) - (previousHits + values.Sum(x => x.Hits)),
  547. };
  548. if (facet.IncludeRemainingTerms)
  549. Results.Results[key].RemainingTerms = allTerms.Skip(Start + values.Count).ToList();
  550. }
  551. }
  552. }
  553. }
  554. }