PageRenderTime 69ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/Raven.Database/Indexing/IndexedTerms.cs

https://github.com/kairogyn/ravendb
C# | 301 lines | 265 code | 31 blank | 5 comment | 41 complexity | 5124debbd04f0d7c86aa0bc95ecc4a7e MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, CC-BY-SA-3.0
  1. // -----------------------------------------------------------------------
  2. // <copyright file="CachedIndexedTerms.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. // -----------------------------------------------------------------------
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Linq;
  9. using System.Threading.Tasks;
  10. using Lucene.Net.Index;
  11. using Lucene.Net.Util;
  12. using Raven.Imports.Newtonsoft.Json.Linq;
  13. using Raven.Json.Linq;
  14. namespace Raven.Database.Indexing
  15. {
  16. public static class IndexedTerms
  17. {
  18. public static void ReadEntriesForFieldsFromTermVectors(
  19. IndexSearcherHolder.IndexSearcherHoldingState state,
  20. HashSet<string> fieldsToRead,
  21. HashSet<int> docIds,
  22. Func<string,string, double> convert,
  23. Action<string, string, double, int> onTermFound)
  24. {
  25. var reader = state.IndexSearcher.IndexReader;
  26. foreach (var docId in docIds)
  27. {
  28. foreach (var field in fieldsToRead)
  29. {
  30. var termFreqVector = reader.GetTermFreqVector(docId, field);
  31. if (termFreqVector == null)
  32. continue;
  33. foreach (var term in termFreqVector.GetTerms())
  34. {
  35. if (LowPrecisionNumber(field, term))
  36. continue;
  37. onTermFound(field, term, convert(field, term), docId);
  38. }
  39. }
  40. }
  41. }
  42. public static void ReadEntriesForFields(
  43. IndexSearcherHolder.IndexSearcherHoldingState state,
  44. HashSet<string> fieldsToRead,
  45. HashSet<int> docIds,
  46. Func<string, string, double> convert,
  47. Action<string, string, double, int> onTermFound)
  48. {
  49. var reader = state.IndexSearcher.IndexReader;
  50. var readFromCache = new Dictionary<string, HashSet<int>>();
  51. state.Lock.EnterReadLock();
  52. try
  53. {
  54. EnsureFieldsAreInCache(state, fieldsToRead, reader);
  55. foreach (var field in fieldsToRead)
  56. {
  57. var read = new HashSet<int>();
  58. readFromCache[field] = read;
  59. foreach (var docId in docIds)
  60. {
  61. foreach (var val in state.GetFromCache(field, docId))
  62. {
  63. read.Add(docId);
  64. double converted;
  65. if (val.Val == null)
  66. {
  67. val.Val = converted = convert(val.Term.Field, val.Term.Text);
  68. }
  69. else
  70. {
  71. converted = val.Val.Value;
  72. }
  73. onTermFound(val.Term.Field, val.Term.Text, converted, docId);
  74. }
  75. }
  76. }
  77. }
  78. finally
  79. {
  80. if (state.Lock.IsReadLockHeld)
  81. state.Lock.ExitReadLock();
  82. }
  83. }
  84. public static void ReadEntriesForFieldsFromTermVectors(
  85. IndexSearcherHolder.IndexSearcherHoldingState state,
  86. HashSet<string> fieldsToRead,
  87. HashSet<int> docIds,
  88. Action<string,string, int> onTermFound)
  89. {
  90. var reader = state.IndexSearcher.IndexReader;
  91. foreach (var docId in docIds)
  92. {
  93. foreach (var field in fieldsToRead)
  94. {
  95. var termFreqVector = reader.GetTermFreqVector(docId, field);
  96. if (termFreqVector == null)
  97. continue;
  98. foreach (var term in termFreqVector.GetTerms())
  99. {
  100. if(LowPrecisionNumber(field, term))
  101. continue;
  102. onTermFound(field, term, docId);
  103. }
  104. }
  105. }
  106. }
  107. public static void ReadEntriesForFields(
  108. IndexSearcherHolder.IndexSearcherHoldingState state,
  109. HashSet<string> fieldsToRead,
  110. HashSet<int> docIds,
  111. Action<string, string, int> onTermFound)
  112. {
  113. var reader = state.IndexSearcher.IndexReader;
  114. state.Lock.EnterReadLock();
  115. try
  116. {
  117. EnsureFieldsAreInCache(state, fieldsToRead, reader);
  118. foreach (var field in fieldsToRead)
  119. {
  120. foreach (var docId in docIds)
  121. {
  122. foreach (var term in state.GetTermsFromCache(field, docId))
  123. {
  124. onTermFound(term.Field, term.Text, docId);
  125. }
  126. }
  127. }
  128. }
  129. finally
  130. {
  131. if (state.Lock.IsReadLockHeld)
  132. state.Lock.ExitReadLock();
  133. }
  134. }
  135. public static void PreFillCache(IndexSearcherHolder.IndexSearcherHoldingState state, string[] fieldsToRead,
  136. IndexReader reader)
  137. {
  138. state.Lock.EnterWriteLock();
  139. try
  140. {
  141. if (fieldsToRead.All(state.IsInCache))
  142. return;
  143. FillCache(state, fieldsToRead, reader);
  144. }
  145. finally
  146. {
  147. state.Lock.ExitWriteLock();
  148. }
  149. }
  150. private static void EnsureFieldsAreInCache(IndexSearcherHolder.IndexSearcherHoldingState state, HashSet<string> fieldsToRead, IndexReader reader)
  151. {
  152. if (fieldsToRead.All(state.IsInCache))
  153. return;
  154. state.Lock.ExitReadLock();
  155. state.Lock.EnterWriteLock();
  156. try
  157. {
  158. var fieldsNotInCache = fieldsToRead.Where(field => state.IsInCache(field) == false).ToList();
  159. if (fieldsToRead.Count > 0)
  160. FillCache(state, fieldsNotInCache, reader);
  161. }
  162. finally
  163. {
  164. state.Lock.ExitWriteLock();
  165. }
  166. state.Lock.EnterReadLock();
  167. }
  168. private static void FillCache(IndexSearcherHolder.IndexSearcherHoldingState state, IEnumerable<string> fieldsToRead,IndexReader reader)
  169. {
  170. foreach (var field in fieldsToRead)
  171. {
  172. var items = new LinkedList<IndexSearcherHolder.IndexSearcherHoldingState.CacheVal>[reader.MaxDoc];
  173. using (var termDocs = reader.TermDocs())
  174. {
  175. using (var termEnum = reader.Terms(new Term(field)))
  176. {
  177. do
  178. {
  179. if (termEnum.Term == null || field != termEnum.Term.Field)
  180. break;
  181. Term term = termEnum.Term;
  182. if (LowPrecisionNumber(term.Field, term.Text))
  183. continue;
  184. var totalDocCountIncludedDeletes = termEnum.DocFreq();
  185. termDocs.Seek(termEnum.Term);
  186. while (termDocs.Next() && totalDocCountIncludedDeletes > 0)
  187. {
  188. totalDocCountIncludedDeletes -= 1;
  189. if (reader.IsDeleted(termDocs.Doc))
  190. continue;
  191. if (items[termDocs.Doc] == null)
  192. items[termDocs.Doc] = new LinkedList<IndexSearcherHolder.IndexSearcherHoldingState.CacheVal>();
  193. items[termDocs.Doc].AddLast(new IndexSearcherHolder.IndexSearcherHoldingState.CacheVal
  194. {
  195. Term = termEnum.Term
  196. });
  197. }
  198. } while (termEnum.Next());
  199. }
  200. }
  201. state.SetInCache(field, items);
  202. }
  203. }
  204. private static bool LowPrecisionNumber(string field, string val)
  205. {
  206. if (field.EndsWith("_Range") == false)
  207. return false;
  208. if (string.IsNullOrEmpty(val))
  209. return false;
  210. return val[0] - NumericUtils.SHIFT_START_INT != 0 &&
  211. val[0] - NumericUtils.SHIFT_START_LONG != 0;
  212. }
  213. public static RavenJObject[] ReadAllEntriesFromIndex(IndexReader reader)
  214. {
  215. if (reader.MaxDoc > 128 * 1024)
  216. {
  217. throw new InvalidOperationException("Refusing to extract all index entires from an index with " + reader.MaxDoc +
  218. " entries, because of the probable time / memory costs associated with that." +
  219. Environment.NewLine +
  220. "Viewing Index Entries are a debug tool, and should not be used on indexes of this size. You might want to try Luke, instead.");
  221. }
  222. var results = new RavenJObject[reader.MaxDoc];
  223. using (var termDocs = reader.TermDocs())
  224. using (var termEnum = reader.Terms())
  225. {
  226. while (termEnum.Next())
  227. {
  228. var term = termEnum.Term;
  229. if (term == null)
  230. break;
  231. var text = term.Text;
  232. termDocs.Seek(termEnum);
  233. for (int i = 0; i < termEnum.DocFreq() && termDocs.Next(); i++)
  234. {
  235. RavenJObject result = results[termDocs.Doc];
  236. if (result == null)
  237. results[termDocs.Doc] = result = new RavenJObject();
  238. var propertyName = term.Field;
  239. if (propertyName.EndsWith("_ConvertToJson") ||
  240. propertyName.EndsWith("_IsArray"))
  241. continue;
  242. if (result.ContainsKey(propertyName))
  243. {
  244. switch (result[propertyName].Type)
  245. {
  246. case JTokenType.Array:
  247. ((RavenJArray)result[propertyName]).Add(text);
  248. break;
  249. case JTokenType.String:
  250. result[propertyName] = new RavenJArray
  251. {
  252. result[propertyName],
  253. text
  254. };
  255. break;
  256. default:
  257. throw new ArgumentException("No idea how to handle " + result[propertyName].Type);
  258. }
  259. }
  260. else
  261. {
  262. result[propertyName] = text;
  263. }
  264. }
  265. }
  266. }
  267. return results;
  268. }
  269. }
  270. }