PageRenderTime 42ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/Raven.Database/Server/RavenFS/Search/IndexStorage.cs

https://github.com/nwendel/ravendb
C# | 237 lines | 218 code | 16 blank | 3 comment | 11 complexity | cda9dc2def939b16fc0830658abdc194 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, CC-BY-SA-3.0
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Collections.Specialized;
  4. using System.Diagnostics;
  5. using System.Globalization;
  6. using System.IO;
  7. using System.Linq;
  8. using Lucene.Net.Documents;
  9. using Lucene.Net.Index;
  10. using Lucene.Net.Search;
  11. using Lucene.Net.Store;
  12. using Raven.Database.Indexing;
  13. using Raven.Json.Linq;
  14. using Raven.Database.Server.RavenFS.Extensions;
  15. using Lucene.Net.QueryParsers;
  16. namespace Raven.Database.Server.RavenFS.Search
  17. {
  18. public class IndexStorage : IDisposable
  19. {
  20. private const string DateIndexFormat = "yyyy-MM-dd_HH-mm-ss";
  21. private static readonly string[] NumericIndexFields = new[] { "__size_numeric" };
  22. private readonly string path;
  23. private FSDirectory directory;
  24. private LowerCaseKeywordAnalyzer analyzer;
  25. private IndexWriter writer;
  26. private readonly object writerLock = new object();
  27. private readonly IndexSearcherHolder currentIndexSearcherHolder = new IndexSearcherHolder();
  28. public IndexStorage(string path, NameValueCollection _)
  29. {
  30. this.path = path;
  31. }
  32. public void Initialize()
  33. {
  34. if (System.IO.Directory.Exists(path) == false)
  35. System.IO.Directory.CreateDirectory(path);
  36. directory = FSDirectory.Open(new DirectoryInfo(path));
  37. if (IndexWriter.IsLocked(directory))
  38. IndexWriter.Unlock(directory);
  39. analyzer = new LowerCaseKeywordAnalyzer();
  40. writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
  41. writer.SetMergeScheduler(new ErrorLoggingConcurrentMergeScheduler());
  42. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(directory, true));
  43. }
  44. public string[] Query(string query, string[] sortFields, int start, int pageSize, out int totalResults)
  45. {
  46. IndexSearcher searcher;
  47. using (GetSearcher(out searcher))
  48. {
  49. Query q;
  50. if (string.IsNullOrEmpty(query))
  51. {
  52. q = new MatchAllDocsQuery();
  53. }
  54. else
  55. {
  56. var queryParser = new RavenQueryParser(analyzer, NumericIndexFields);
  57. q = queryParser.Parse(query);
  58. }
  59. var topDocs = ExecuteQuery(searcher, sortFields, q, pageSize + start);
  60. var results = new List<string>();
  61. for (var i = start; i < pageSize + start && i < topDocs.TotalHits; i++)
  62. {
  63. var document = searcher.Doc(topDocs.ScoreDocs[i].Doc);
  64. results.Add(document.Get("__key"));
  65. }
  66. totalResults = topDocs.TotalHits;
  67. return results.ToArray();
  68. }
  69. }
  70. private TopDocs ExecuteQuery(IndexSearcher searcher, string[] sortFields, Query q, int size)
  71. {
  72. TopDocs topDocs;
  73. if (sortFields != null && sortFields.Length > 0)
  74. {
  75. var sort = new Sort(sortFields.Select(field =>
  76. {
  77. var desc = field.StartsWith("-");
  78. if (desc)
  79. field = field.Substring(1);
  80. return new SortField(field, SortField.STRING, desc);
  81. }).ToArray());
  82. topDocs = searcher.Search(q, null, size, sort);
  83. }
  84. else
  85. {
  86. topDocs = searcher.Search(q, null, size);
  87. }
  88. return topDocs;
  89. }
  90. public virtual void Index(string key, RavenJObject metadata)
  91. {
  92. lock (writerLock)
  93. {
  94. var lowerKey = key.ToLowerInvariant();
  95. var doc = CreateDocument(lowerKey, metadata);
  96. // REVIEW: Check if there is more straight-forward/efficient pattern out there to work with RavenJObjects.
  97. var lookup = metadata.ToLookup(x => x.Key);
  98. foreach ( var metadataKey in lookup )
  99. {
  100. foreach ( var metadataHolder in metadataKey )
  101. {
  102. doc.Add(new Field(metadataHolder.Key, metadataHolder.Value.ToString(), Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
  103. }
  104. }
  105. writer.DeleteDocuments(new Term("__key", lowerKey));
  106. writer.AddDocument(doc);
  107. // yes, this is slow, but we aren't expecting high writes count
  108. writer.Commit();
  109. ReplaceSearcher();
  110. }
  111. }
  112. private static Document CreateDocument(string lowerKey, RavenJObject metadata)
  113. {
  114. var doc = new Document();
  115. doc.Add(new Field("__key", lowerKey, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
  116. var fileName = Path.GetFileName(lowerKey);
  117. Debug.Assert(fileName != null);
  118. doc.Add(new Field("__fileName", fileName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
  119. // the reversed version of the file name is used to allow searches that start with wildcards
  120. char[] revFileName = fileName.ToCharArray();
  121. Array.Reverse(revFileName);
  122. doc.Add(new Field("__rfileName", new string(revFileName), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
  123. int level = 0;
  124. var directoryName = Path.GetDirectoryName(lowerKey);
  125. do
  126. {
  127. level += 1;
  128. directoryName = (string.IsNullOrEmpty(directoryName) ? "" : directoryName.Replace("\\", "/"));
  129. if (directoryName.StartsWith("/") == false)
  130. directoryName = "/" + directoryName;
  131. doc.Add(new Field("__directory", directoryName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
  132. directoryName = Path.GetDirectoryName(directoryName);
  133. } while (directoryName != null);
  134. doc.Add(new Field("__modified", DateTime.UtcNow.ToString(DateIndexFormat, CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
  135. doc.Add(new Field("__level", level.ToString(CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
  136. RavenJToken contentLen;
  137. if ( metadata.TryGetValue("Content-Length", out contentLen))
  138. {
  139. long len;
  140. if (long.TryParse(contentLen.Value<string>(), out len))
  141. {
  142. doc.Add(new Field("__size", len.ToString("D20"), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
  143. doc.Add(new NumericField("__size_numeric", Field.Store.NO, true).SetLongValue(len));
  144. }
  145. }
  146. return doc;
  147. }
  148. internal IDisposable GetSearcher(out IndexSearcher searcher)
  149. {
  150. return currentIndexSearcherHolder.GetSearcher(out searcher);
  151. }
  152. public void Dispose()
  153. {
  154. analyzer.Close();
  155. if (currentIndexSearcherHolder != null)
  156. {
  157. currentIndexSearcherHolder.SetIndexSearcher(null);
  158. }
  159. writer.Dispose();
  160. directory.Dispose();
  161. }
  162. public void Delete(string key)
  163. {
  164. var lowerKey = key.ToLowerInvariant();
  165. lock (writerLock)
  166. {
  167. writer.DeleteDocuments(new Term("__key", lowerKey));
  168. writer.Optimize();
  169. writer.Commit();
  170. ReplaceSearcher();
  171. }
  172. }
  173. private void ReplaceSearcher()
  174. {
  175. currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(writer.GetReader()));
  176. }
  177. public IEnumerable<string> GetTermsFor(string field, string fromValue)
  178. {
  179. IndexSearcher searcher;
  180. using (GetSearcher(out searcher))
  181. {
  182. var termEnum = searcher.IndexReader.Terms(new Term(field, fromValue ?? string.Empty));
  183. try
  184. {
  185. if (string.IsNullOrEmpty(fromValue) == false) // need to skip this value
  186. {
  187. while (termEnum.Term == null || fromValue.Equals(termEnum.Term.Text))
  188. {
  189. if (termEnum.Next() == false)
  190. yield break;
  191. }
  192. }
  193. while (termEnum.Term == null ||
  194. field.Equals(termEnum.Term.Field))
  195. {
  196. if (termEnum.Term != null)
  197. {
  198. var item = termEnum.Term.Text;
  199. yield return item;
  200. }
  201. if (termEnum.Next() == false)
  202. break;
  203. }
  204. }
  205. finally
  206. {
  207. termEnum.Dispose();
  208. }
  209. }
  210. }
  211. }
  212. }