PageRenderTime 62ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/v0.5.1/src/Savant/Indexing/DomainCrawler.cs

#
C# | 248 lines | 191 code | 31 blank | 26 comment | 24 complexity | a94369c7e65b227fe14677f076c7bf3b MD5 | raw file
  1. /*
  2. * Copyright 2009 Coditate Software
  3. * Licensed under the GNU Library General Public License (LGPL) 2.1
  4. *
  5. * License available at: http://simplesavant.codeplex.com/license
  6. */
  7. using System;
  8. using System.Collections;
  9. using System.Collections.Generic;
  10. using System.Linq;
  11. using Coditate.Common.Util;
  12. using Coditate.Savant.Consistency;
  13. using Coditate.Savant.Core;
  14. using Coditate.Savant.Data;
  15. using Common.Logging;
  16. namespace Coditate.Savant.Indexing
  17. {
  18. /// <summary>
  19. /// Crawls updated items in a single domain and feeds their indexed attributes
  20. /// to the indexer.
  21. /// </summary>
  22. /// <remarks>
  23. /// <see cref="IndexBuilder"/> instantiates a single instance of this class for each domain registered for indexing.
  24. /// </remarks>
  25. internal class DomainCrawler
  26. {
  27. private static readonly ILog Log = LogManager.GetCurrentClassLogger();
  28. private readonly object crawlLock = new object();
  29. private readonly AttributeMapping versionMapping;
  30. private SelectCommand domainSelect;
  31. private SelectCommand indexStateSelect;
  32. private IndexState lastIndexState;
  33. public DomainCrawler(ISimpleSavant2 savant, SavantConfig config, ItemMapping mapping)
  34. {
  35. Arg.CheckNull("savant", savant);
  36. Arg.CheckNull("config", config);
  37. Arg.CheckNull("mapping", mapping);
  38. Savant = savant;
  39. Config = config;
  40. Mapping = mapping;
  41. IndexBatchSize = IndexBuilder.DefaultIndexBatchSize;
  42. versionMapping = VersioningUtils.GetVersionMapping(mapping);
  43. CheckMappingValid();
  44. }
  45. public SavantConfig Config { get; private set; }
  46. public ItemMapping Mapping { get; private set; }
  47. public ISimpleSavant2 Savant { get; private set; }
  48. public int IndexBatchSize { get; set; }
  49. private void CheckMappingValid()
  50. {
  51. if (versionMapping == null || versionMapping.FormatType != typeof (DateTime))
  52. {
  53. string message =
  54. string.Format(
  55. "Mapping has missing or invalid version property. To support full-text indexing object mappings must include at least one DateTime property marked with {0}.",
  56. typeof (VersionAttribute).Name);
  57. throw new InvalidOperationException(message);
  58. }
  59. if (!Mapping.AttributeMappings.Where(a => a.IsIndexed).Any())
  60. {
  61. string message =
  62. string.Format(
  63. "Mapping has no indexed properties. To support full-text indexing object mappings must include at least one String property marked with {0}.",
  64. typeof (IndexAttribute).Name);
  65. throw new InvalidOperationException(message);
  66. }
  67. }
  68. /// <summary>
  69. /// Invoked by ThreadPool workers to crawl the domain.
  70. /// </summary>
  71. public void Crawl(object state)
  72. {
  73. // ensure that only one work thread crawls the domain at once in
  74. // case crawling takes longer than IndexBuilder.UpdateInterval
  75. lock (crawlLock)
  76. {
  77. // use consistent reads for entire indexing operation
  78. using (new ConsistentReadScope())
  79. {
  80. CrawlImpl();
  81. }
  82. }
  83. }
  84. private void CrawlImpl()
  85. {
  86. IndexState indexState = GetIndexState();
  87. SelectCommand command = GetDomainSelect(indexState.LastIndexedVersion);
  88. SelectResults<PropertyValues> results;
  89. bool batchComplete;
  90. do
  91. {
  92. results = Savant.SelectAttributes(command);
  93. command.PaginationToken = results.PaginationToken;
  94. batchComplete = results.PaginationToken == null;
  95. IndexValues(results.Items);
  96. // update index state in system domain each time we finish indexing a batch
  97. PropertyValues lastItem = results.Items.LastOrDefault();
  98. UpdateIndexState(indexState, lastItem, batchComplete);
  99. } while (!batchComplete);
  100. }
  101. private void UpdateIndexState(IndexState indexState, PropertyValues lastItem, bool batchComplete)
  102. {
  103. if (lastItem != null)
  104. {
  105. indexState.LastIndexedVersion = (DateTime) lastItem[versionMapping.PropertyName];
  106. }
  107. // if our index batch has ended with the same version twice, then increment the index state version by 1 ms.
  108. // This keeps us from retrieving the last updated items repeatedly when the indexed domain is
  109. // infrequently updated. There is a slight risk that some records could be skipped if
  110. // a huge batch of items is inserted with the exact same version (but only if the batch insertion itself spans two complete
  111. // indexing cycles).
  112. if (batchComplete && lastIndexState != null &&
  113. (indexState.LastIndexedVersion == lastIndexState.LastIndexedVersion))
  114. {
  115. indexState.LastIndexedVersion += TimeSpan.FromMilliseconds(1);
  116. }
  117. ItemMapping indexStateMapping = ItemMapping.Create(typeof (IndexState));
  118. PropertyValues indexStateValues = PropertyValues.CreateValues(indexState);
  119. Savant.PutAttributes(indexStateMapping, indexStateValues);
  120. if (batchComplete)
  121. {
  122. lastIndexState = indexState;
  123. }
  124. }
  125. private void IndexValues(List<PropertyValues> allValues)
  126. {
  127. List<IndexValues> indexItems = BuildIndexValues(allValues);
  128. Log.Debug(
  129. m => m("Found {0} new/updated item(s) to index in domain '{1}'", allValues.Count, Mapping.DomainName));
  130. Config.Indexer.IndexItems(Mapping.DomainName, indexItems);
  131. }
  132. private IndexState GetIndexState()
  133. {
  134. SelectCommand command = GetIndexStateSelect();
  135. SelectResults<PropertyValues> results = Savant.SelectAttributes(command);
  136. PropertyValues values = results.Items.FirstOrDefault();
  137. IndexState state = null;
  138. if (values != null)
  139. {
  140. state = (IndexState) PropertyValues.CreateItem(typeof (IndexState), values);
  141. Log.Debug(m => m("Found previous index state where DomainName = '{0}' and MachineGuid = '{1}': LastIndexedVersion = '{2:yyyy/MM/dd HH:mm:ss.fffK}'",
  142. Mapping.DomainName, IndexState.GetMachineGuid(), state.LastIndexedVersion));
  143. }
  144. if (state == null)
  145. {
  146. Log.Debug(m => m("Found no previous index state where DomainName = '{0}' and MachineGuid = '{1}'",
  147. Mapping.DomainName, IndexState.GetMachineGuid()));
  148. state = new IndexState
  149. {
  150. DomainName = Mapping.DomainName,
  151. HostName = IndexState.GetHostName(),
  152. LastIndexedVersion = DateTime.MinValue,
  153. MachineGuid = IndexState.GetMachineGuid()
  154. };
  155. }
  156. return state;
  157. }
  158. private SelectCommand GetIndexStateSelect()
  159. {
  160. if (indexStateSelect == null)
  161. {
  162. string selectQuery =
  163. "select * from SavantSystem where DataType = @DataType and MachineGuid = @MachineGuid and DomainName = @DomainName";
  164. var typeParam = new CommandParameter("DataType", null);
  165. var machineParam = new CommandParameter("MachineGuid", null);
  166. var domainParam = new CommandParameter("DomainName", null);
  167. indexStateSelect = new SelectCommand(typeof (IndexState), selectQuery, typeParam, machineParam,
  168. domainParam);
  169. }
  170. indexStateSelect.Reset();
  171. indexStateSelect.GetParameter("DataType").Values.Add(typeof (IndexState).Name);
  172. indexStateSelect.GetParameter("MachineGuid").Values.Add(IndexState.GetMachineGuid());
  173. indexStateSelect.GetParameter("DomainName").Values.Add(Mapping.DomainName);
  174. return indexStateSelect;
  175. }
  176. private SelectCommand GetDomainSelect(DateTime lastIndexTime)
  177. {
  178. if (domainSelect == null)
  179. {
  180. List<string> attributeNames =
  181. Mapping.AttributeMappings.Where(a => a.IsIndexed).Select(a => a.AttributeName).ToList();
  182. attributeNames.Add(versionMapping.AttributeName);
  183. string attributeList = StringUtils.Join(", ", attributeNames);
  184. string selectQuery =
  185. string.Format("select {0} from {1} where {2} >= @LastIndexedVersion order by {2} asc limit {3}",
  186. attributeList, Mapping.DomainName, versionMapping.AttributeName, IndexBatchSize);
  187. var parameter = new CommandParameter("LastIndexedVersion", versionMapping.PropertyName, null);
  188. domainSelect = new SelectCommand(Mapping, selectQuery, parameter);
  189. domainSelect.MaxResultPages = 1;
  190. }
  191. domainSelect.Reset();
  192. domainSelect.GetParameter("LastIndexedVersion").Values.Add(lastIndexTime);
  193. return domainSelect;
  194. }
  195. internal List<IndexValues> BuildIndexValues(List<PropertyValues> allValues)
  196. {
  197. var indexItems = new List<IndexValues>();
  198. foreach (PropertyValues values in allValues)
  199. {
  200. string id = MappingUtils.ItemNameToString(Config.Formatter, Mapping.ItemNameMapping, values.ItemName);
  201. var indexItem = new IndexValues(id);
  202. foreach (string propertyName in values)
  203. {
  204. if (propertyName == versionMapping.PropertyName)
  205. {
  206. continue;
  207. }
  208. // concatenate all attribute values into a single string for indexing
  209. ICollection valueList = MappingUtils.ToList(values[propertyName]);
  210. indexItem[propertyName] = StringUtils.Join(" ", valueList);
  211. }
  212. indexItems.Add(indexItem);
  213. }
  214. return indexItems;
  215. }
  216. }
  217. }