PageRenderTime 32ms CodeModel.GetById 15ms app.highlight 13ms RepoModel.GetById 1ms app.codeStats 0ms

/Raven.Database/Server/RavenFS/Search/IndexStorage.cs

https://github.com/nwendel/ravendb
C# | 237 lines | 218 code | 16 blank | 3 comment | 11 complexity | cda9dc2def939b16fc0830658abdc194 MD5 | raw file
  1using System;
  2using System.Collections.Generic;
  3using System.Collections.Specialized;
  4using System.Diagnostics;
  5using System.Globalization;
  6using System.IO;
  7using System.Linq;
  8using Lucene.Net.Documents;
  9using Lucene.Net.Index;
 10using Lucene.Net.Search;
 11using Lucene.Net.Store;
 12using Raven.Database.Indexing;
 13using Raven.Json.Linq;
 14using Raven.Database.Server.RavenFS.Extensions;
 15using Lucene.Net.QueryParsers;
 16
 17namespace Raven.Database.Server.RavenFS.Search
 18{
 19	public class IndexStorage : IDisposable
 20	{
 21		private const string DateIndexFormat = "yyyy-MM-dd_HH-mm-ss";
 22		private static readonly string[] NumericIndexFields = new[] { "__size_numeric" };
 23
 24		private readonly string path;
 25		private FSDirectory directory;
 26		private LowerCaseKeywordAnalyzer analyzer;
 27		private IndexWriter writer;
 28		private readonly object writerLock = new object();
 29		private readonly IndexSearcherHolder currentIndexSearcherHolder = new IndexSearcherHolder();
 30
 31		public IndexStorage(string path, NameValueCollection _)
 32		{
 33			this.path = path;
 34		}
 35
 36		public void Initialize()
 37		{
 38			if (System.IO.Directory.Exists(path) == false)
 39				System.IO.Directory.CreateDirectory(path);
 40			directory = FSDirectory.Open(new DirectoryInfo(path));
 41			if (IndexWriter.IsLocked(directory))
 42				IndexWriter.Unlock(directory);
 43
 44			analyzer = new LowerCaseKeywordAnalyzer();
 45			writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
 46			writer.SetMergeScheduler(new ErrorLoggingConcurrentMergeScheduler());
 47			currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(directory, true));
 48		}
 49
 50		public string[] Query(string query, string[] sortFields, int start, int pageSize, out int totalResults)
 51		{
 52			IndexSearcher searcher;
 53			using (GetSearcher(out searcher))
 54			{
 55				Query q;
 56				if (string.IsNullOrEmpty(query))
 57				{
 58					q = new MatchAllDocsQuery();
 59				}
 60				else
 61				{
 62					var queryParser = new RavenQueryParser(analyzer, NumericIndexFields);
 63                    q = queryParser.Parse(query);
 64				}
 65
 66				var topDocs = ExecuteQuery(searcher, sortFields, q, pageSize + start);
 67
 68				var results = new List<string>();
 69
 70				for (var i = start; i < pageSize + start && i < topDocs.TotalHits; i++)
 71				{
 72					var document = searcher.Doc(topDocs.ScoreDocs[i].Doc);
 73					results.Add(document.Get("__key"));
 74				}
 75				totalResults = topDocs.TotalHits;
 76				return results.ToArray();
 77			}
 78		}
 79
 80		private TopDocs ExecuteQuery(IndexSearcher searcher, string[] sortFields, Query q, int size)
 81		{
 82			TopDocs topDocs;
 83			if (sortFields != null && sortFields.Length > 0)
 84			{
 85				var sort = new Sort(sortFields.Select(field =>
 86				{
 87					var desc = field.StartsWith("-");
 88					if (desc)
 89						field = field.Substring(1);
 90					return new SortField(field, SortField.STRING, desc);
 91				}).ToArray());
 92				topDocs = searcher.Search(q, null, size, sort);
 93			}
 94			else
 95			{
 96				topDocs = searcher.Search(q, null, size);
 97			}
 98			return topDocs;
 99		}
100
101        public virtual void Index(string key, RavenJObject metadata)
102        {
103            lock (writerLock)
104            {
105                var lowerKey = key.ToLowerInvariant();
106                var doc = CreateDocument(lowerKey, metadata);
107
108                // REVIEW: Check if there is more straight-forward/efficient pattern out there to work with RavenJObjects.
109                var lookup = metadata.ToLookup(x => x.Key);
110                foreach ( var metadataKey in lookup )
111                {
112                    foreach ( var metadataHolder in metadataKey )
113                    {                        
114                        doc.Add(new Field(metadataHolder.Key, metadataHolder.Value.ToString(), Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));
115                    }
116                }
117
118                writer.DeleteDocuments(new Term("__key", lowerKey));
119                writer.AddDocument(doc);
120                // yes, this is slow, but we aren't expecting high writes count
121                writer.Commit();
122                ReplaceSearcher();
123            }
124        }
125
126        private static Document CreateDocument(string lowerKey, RavenJObject metadata)
127        {
128            var doc = new Document();
129            doc.Add(new Field("__key", lowerKey, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
130
131            var fileName = Path.GetFileName(lowerKey);
132            Debug.Assert(fileName != null);
133            doc.Add(new Field("__fileName", fileName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
134            // the reversed version of the file name is used to allow searches that start with wildcards
135            char[] revFileName = fileName.ToCharArray();
136            Array.Reverse(revFileName);
137            doc.Add(new Field("__rfileName", new string(revFileName), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
138
139            int level = 0;
140            var directoryName = Path.GetDirectoryName(lowerKey);
141            do
142            {
143                level += 1;
144                directoryName = (string.IsNullOrEmpty(directoryName) ? "" : directoryName.Replace("\\", "/"));
145                if (directoryName.StartsWith("/") == false)
146                    directoryName = "/" + directoryName;
147                doc.Add(new Field("__directory", directoryName, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
148                directoryName = Path.GetDirectoryName(directoryName);
149            } while (directoryName != null);
150            doc.Add(new Field("__modified", DateTime.UtcNow.ToString(DateIndexFormat, CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
151            doc.Add(new Field("__level", level.ToString(CultureInfo.InvariantCulture), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
152            
153            RavenJToken contentLen;
154            if ( metadata.TryGetValue("Content-Length", out contentLen))
155            {
156                long len;
157                if (long.TryParse(contentLen.Value<string>(), out len))
158                {
159                    doc.Add(new Field("__size", len.ToString("D20"), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
160                    doc.Add(new NumericField("__size_numeric", Field.Store.NO, true).SetLongValue(len));
161                }
162            }
163
164            return doc;
165        }
166
167		internal IDisposable GetSearcher(out IndexSearcher searcher)
168		{
169			return currentIndexSearcherHolder.GetSearcher(out searcher);
170		}
171
172		public void Dispose()
173		{
174			analyzer.Close();
175			if (currentIndexSearcherHolder != null)
176			{
177				currentIndexSearcherHolder.SetIndexSearcher(null);
178			}
179			writer.Dispose();
180			directory.Dispose();
181		}
182
183		public void Delete(string key)
184		{
185			var lowerKey = key.ToLowerInvariant();
186
187			lock (writerLock)
188			{
189				writer.DeleteDocuments(new Term("__key", lowerKey));
190				writer.Optimize();
191				writer.Commit();
192				ReplaceSearcher();
193			}
194		}
195
196		private void ReplaceSearcher()
197		{
198			currentIndexSearcherHolder.SetIndexSearcher(new IndexSearcher(writer.GetReader()));
199		}
200
201		public IEnumerable<string> GetTermsFor(string field, string fromValue)
202		{
203			IndexSearcher searcher;
204			using (GetSearcher(out searcher))
205			{
206				var termEnum = searcher.IndexReader.Terms(new Term(field, fromValue ?? string.Empty));
207				try
208				{
209					if (string.IsNullOrEmpty(fromValue) == false) // need to skip this value
210					{
211						while (termEnum.Term == null || fromValue.Equals(termEnum.Term.Text))
212						{
213							if (termEnum.Next() == false)
214								yield break;
215						}
216					}
217					while (termEnum.Term == null ||
218						field.Equals(termEnum.Term.Field))
219					{
220						if (termEnum.Term != null)
221						{
222							var item = termEnum.Term.Text;
223							yield return item;
224						}
225
226						if (termEnum.Next() == false)
227							break;
228					}
229				}
230				finally
231				{
232					termEnum.Dispose();
233				}
234			}
235		}
236	}
237}