PageRenderTime 55ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/Raven.Database/Indexing/AnonymousObjectToLuceneDocumentConverter.cs

https://github.com/kairogyn/ravendb
C# | 554 lines | 472 code | 60 blank | 22 comment | 160 complexity | 0cdfc742060a300b2f314403c8cc7d2c MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, BSD-3-Clause, CC-BY-SA-3.0
  1. //-----------------------------------------------------------------------
  2. // <copyright file="AnonymousObjectToLuceneDocumentConverter.cs" company="Hibernating Rhinos LTD">
  3. // Copyright (c) Hibernating Rhinos LTD. All rights reserved.
  4. // </copyright>
  5. //-----------------------------------------------------------------------
  6. using System;
  7. using System.Collections;
  8. using System.Collections.Generic;
  9. using System.ComponentModel;
  10. using System.Globalization;
  11. using System.IO;
  12. using System.Linq;
  13. using Lucene.Net.Documents;
  14. using Raven.Abstractions.Extensions;
  15. using Raven.Database.Linq;
  16. using Raven.Imports.Newtonsoft.Json;
  17. using Raven.Imports.Newtonsoft.Json.Linq;
  18. using Raven.Abstractions;
  19. using Raven.Abstractions.Data;
  20. using Raven.Abstractions.Indexing;
  21. using Raven.Abstractions.Linq;
  22. using Raven.Database.Extensions;
  23. using Raven.Json.Linq;
  24. namespace Raven.Database.Indexing
  25. {
  26. public class AnonymousObjectToLuceneDocumentConverter
  27. {
  28. private readonly AbstractViewGenerator viewGenerator;
  29. private readonly DocumentDatabase database;
  30. private readonly IndexDefinition indexDefinition;
  31. private readonly List<int> multipleItemsSameFieldCount = new List<int>();
  32. private readonly Dictionary<FieldCacheKey, Field> fieldsCache = new Dictionary<FieldCacheKey, Field>();
  33. private readonly Dictionary<FieldCacheKey, NumericField> numericFieldsCache = new Dictionary<FieldCacheKey, NumericField>();
  34. public AnonymousObjectToLuceneDocumentConverter(DocumentDatabase database, IndexDefinition indexDefinition, AbstractViewGenerator viewGenerator)
  35. {
  36. this.database = database;
  37. this.indexDefinition = indexDefinition;
  38. this.viewGenerator = viewGenerator;
  39. }
  40. public IEnumerable<AbstractField> Index(object val, PropertyDescriptorCollection properties, Field.Store defaultStorage)
  41. {
  42. return from property in properties.Cast<PropertyDescriptor>()
  43. where property.Name != Constants.DocumentIdFieldName
  44. from field in CreateFields(property.Name, property.GetValue(val), defaultStorage)
  45. select field;
  46. }
  47. public IEnumerable<AbstractField> Index(RavenJObject document, Field.Store defaultStorage)
  48. {
  49. return from property in document
  50. where property.Key != Constants.DocumentIdFieldName
  51. from field in CreateFields(property.Key, GetPropertyValue(property.Value), defaultStorage)
  52. select field;
  53. }
  54. private static object GetPropertyValue(RavenJToken property)
  55. {
  56. switch (property.Type)
  57. {
  58. case JTokenType.Array:
  59. case JTokenType.Object:
  60. return property.ToString(Formatting.None);
  61. default:
  62. return property.Value<object>();
  63. }
  64. }
  65. /// <summary>
  66. /// This method generate the fields for indexing documents in lucene from the values.
  67. /// Given a name and a value, it has the following behavior:
  68. /// * If the value is enumerable, index all the items in the enumerable under the same field name
  69. /// * If the value is null, create a single field with the supplied name with the unanalyzed value 'NULL_VALUE'
  70. /// * If the value is string or was set to not analyzed, create a single field with the supplied name
  71. /// * If the value is date, create a single field with millisecond precision with the supplied name
  72. /// * If the value is numeric (int, long, double, decimal, or float) will create two fields:
  73. /// 1. with the supplied name, containing the numeric value as an unanalyzed string - useful for direct queries
  74. /// 2. with the name: name +'_Range', containing the numeric value in a form that allows range queries
  75. /// </summary>
  76. public IEnumerable<AbstractField> CreateFields(string name, object value, Field.Store defaultStorage, bool nestedArray = false, Field.TermVector defaultTermVector = Field.TermVector.NO, Field.Index? analyzed = null)
  77. {
  78. if (string.IsNullOrWhiteSpace(name))
  79. throw new ArgumentException("Field must be not null, not empty and cannot contain whitespace", "name");
  80. if (char.IsLetter(name[0]) == false && name[0] != '_')
  81. {
  82. name = "_" + name;
  83. }
  84. if (viewGenerator.IsSpatialField(name))
  85. return viewGenerator.GetSpatialField(name).CreateIndexableFields(value);
  86. return CreateRegularFields(name, value, defaultStorage, nestedArray, defaultTermVector, analyzed);
  87. }
  88. private IEnumerable<AbstractField> CreateRegularFields(string name, object value, Field.Store defaultStorage, bool nestedArray = false, Field.TermVector defaultTermVector = Field.TermVector.NO, Field.Index? analyzed = null)
  89. {
  90. var fieldIndexingOptions = analyzed ?? indexDefinition.GetIndex(name, null);
  91. var storage = indexDefinition.GetStorage(name, defaultStorage);
  92. var termVector = indexDefinition.GetTermVector(name, defaultTermVector);
  93. if (fieldIndexingOptions == Field.Index.NO && storage == Field.Store.NO && termVector == Field.TermVector.NO)
  94. {
  95. yield break;
  96. }
  97. if (fieldIndexingOptions == Field.Index.NO && storage == Field.Store.NO)
  98. {
  99. fieldIndexingOptions = Field.Index.ANALYZED; // we have some sort of term vector, forcing index to be analyzed, then.
  100. }
  101. if (value == null)
  102. {
  103. yield return CreateFieldWithCaching(name, Constants.NullValue, storage,
  104. Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
  105. yield break;
  106. }
  107. var attachmentFoIndexing = value as AttachmentForIndexing;
  108. if (attachmentFoIndexing != null)
  109. {
  110. if (database == null)
  111. throw new InvalidOperationException(
  112. "Cannot use attachment for indexing if the database parameter is null. This is probably a RavenDB bug");
  113. var attachment = database.GetStatic(attachmentFoIndexing.Key);
  114. if (attachment == null)
  115. {
  116. yield break;
  117. }
  118. var fieldWithCaching = CreateFieldWithCaching(name, string.Empty, Field.Store.NO, fieldIndexingOptions, termVector);
  119. if (database.TransactionalStorage.IsAlreadyInBatch)
  120. {
  121. var streamReader = new StreamReader(attachment.Data());
  122. fieldWithCaching.SetValue(streamReader);
  123. }
  124. else
  125. {
  126. // we are not in batch operation so we have to create it be able to read attachment's data
  127. database.TransactionalStorage.Batch(accessor =>
  128. {
  129. var streamReader = new StreamReader(attachment.Data());
  130. // we have to read it into memory because we after exiting the batch an attachment's data stream will be closed
  131. fieldWithCaching.SetValue(streamReader.ReadToEnd());
  132. });
  133. }
  134. yield return fieldWithCaching;
  135. yield break;
  136. }
  137. if (Equals(value, string.Empty))
  138. {
  139. yield return CreateFieldWithCaching(name, Constants.EmptyString, storage,
  140. Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
  141. yield break;
  142. }
  143. var dynamicNullObject = value as DynamicNullObject;
  144. if (ReferenceEquals(dynamicNullObject, null) == false)
  145. {
  146. if (dynamicNullObject.IsExplicitNull)
  147. {
  148. var sortOptions = indexDefinition.GetSortOption(name);
  149. if (sortOptions == null || sortOptions.Value == SortOptions.None || sortOptions.Value == SortOptions.String ||
  150. sortOptions.Value == SortOptions.StringVal || sortOptions.Value == SortOptions.Custom)
  151. {
  152. yield return CreateFieldWithCaching(name, Constants.NullValue, storage,
  153. Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
  154. }
  155. foreach (var field in CreateNumericFieldWithCaching(name, GetNullValueForSorting(sortOptions), storage, termVector))
  156. yield return field;
  157. }
  158. yield break;
  159. }
  160. var boostedValue = value as BoostedValue;
  161. if (boostedValue != null)
  162. {
  163. foreach (var field in CreateFields(name, boostedValue.Value, storage, false, termVector))
  164. {
  165. field.Boost = boostedValue.Boost;
  166. field.OmitNorms = false;
  167. yield return field;
  168. }
  169. yield break;
  170. }
  171. var abstractField = value as AbstractField;
  172. if (abstractField != null)
  173. {
  174. yield return abstractField;
  175. yield break;
  176. }
  177. var bytes = value as byte[];
  178. if (bytes != null)
  179. {
  180. yield return CreateBinaryFieldWithCaching(name, bytes, storage, fieldIndexingOptions, termVector);
  181. yield break;
  182. }
  183. var itemsToIndex = value as IEnumerable;
  184. if (itemsToIndex != null && ShouldTreatAsEnumerable(itemsToIndex))
  185. {
  186. int count = 1;
  187. if (nestedArray == false)
  188. yield return new Field(name + "_IsArray", "true", storage, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
  189. foreach (var itemToIndex in itemsToIndex)
  190. {
  191. if (!CanCreateFieldsForNestedArray(itemToIndex, fieldIndexingOptions))
  192. continue;
  193. multipleItemsSameFieldCount.Add(count++);
  194. foreach (var field in CreateFields(name, itemToIndex, storage, nestedArray: true, defaultTermVector: defaultTermVector, analyzed: analyzed))
  195. yield return field;
  196. multipleItemsSameFieldCount.RemoveAt(multipleItemsSameFieldCount.Count - 1);
  197. }
  198. yield break;
  199. }
  200. if (Equals(fieldIndexingOptions, Field.Index.NOT_ANALYZED) ||
  201. Equals(fieldIndexingOptions, Field.Index.NOT_ANALYZED_NO_NORMS))// explicitly not analyzed
  202. {
  203. // date time, time span and date time offset have the same structure fo analyzed and not analyzed.
  204. if (!(value is DateTime) && !(value is DateTimeOffset) && !(value is TimeSpan))
  205. {
  206. yield return CreateFieldWithCaching(name, value.ToString(), storage,
  207. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  208. yield break;
  209. }
  210. }
  211. if (value is string)
  212. {
  213. var index = indexDefinition.GetIndex(name, Field.Index.ANALYZED);
  214. yield return CreateFieldWithCaching(name, value.ToString(), storage, index, termVector);
  215. yield break;
  216. }
  217. if (value is TimeSpan)
  218. {
  219. var val = (TimeSpan)value;
  220. yield return CreateFieldWithCaching(name, val.ToString("c", CultureInfo.InvariantCulture), storage,
  221. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  222. }
  223. else if (value is DateTime)
  224. {
  225. var val = (DateTime)value;
  226. var dateAsString = val.ToString(Default.DateTimeFormatsToWrite, CultureInfo.InvariantCulture);
  227. if (val.Kind == DateTimeKind.Utc)
  228. dateAsString += "Z";
  229. yield return CreateFieldWithCaching(name, dateAsString, storage,
  230. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  231. }
  232. else if (value is DateTimeOffset)
  233. {
  234. var val = (DateTimeOffset)value;
  235. string dtoStr;
  236. if (Equals(fieldIndexingOptions, Field.Index.NOT_ANALYZED) || Equals(fieldIndexingOptions, Field.Index.NOT_ANALYZED_NO_NORMS))
  237. {
  238. dtoStr = val.ToString(Default.DateTimeOffsetFormatsToWrite, CultureInfo.InvariantCulture);
  239. }
  240. else
  241. {
  242. dtoStr = val.UtcDateTime.ToString(Default.DateTimeFormatsToWrite, CultureInfo.InvariantCulture) + "Z";
  243. }
  244. yield return CreateFieldWithCaching(name, dtoStr, storage,
  245. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  246. }
  247. else if (value is bool)
  248. {
  249. yield return new Field(name, ((bool)value) ? "true" : "false", storage,
  250. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  251. }
  252. else if (value is double)
  253. {
  254. var d = (double)value;
  255. yield return CreateFieldWithCaching(name, d.ToString("r", CultureInfo.InvariantCulture), storage,
  256. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  257. }
  258. else if (value is decimal)
  259. {
  260. var d = (decimal)value;
  261. var s = d.ToString(CultureInfo.InvariantCulture);
  262. if (s.Contains('.'))
  263. {
  264. s = s.TrimEnd('0');
  265. if (s.EndsWith("."))
  266. s = s.Substring(0, s.Length - 1);
  267. }
  268. yield return CreateFieldWithCaching(name, s, storage,
  269. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  270. }
  271. else if (value is IConvertible) // we need this to store numbers in invariant format, so JSON could read them
  272. {
  273. var convert = ((IConvertible)value);
  274. yield return CreateFieldWithCaching(name, convert.ToString(CultureInfo.InvariantCulture), storage,
  275. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  276. }
  277. else if (value is IDynamicJsonObject)
  278. {
  279. var inner = ((IDynamicJsonObject)value).Inner;
  280. yield return CreateFieldWithCaching(name + "_ConvertToJson", "true", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
  281. yield return CreateFieldWithCaching(name, inner.ToString(Formatting.None), storage,
  282. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  283. }
  284. else
  285. {
  286. var jsonVal = RavenJToken.FromObject(value).ToString(Formatting.None);
  287. if (jsonVal.StartsWith("{") || jsonVal.StartsWith("["))
  288. yield return CreateFieldWithCaching(name + "_ConvertToJson", "true", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO);
  289. else if (jsonVal.StartsWith("\"") && jsonVal.EndsWith("\"") && jsonVal.Length > 1)
  290. jsonVal = jsonVal.Substring(1, jsonVal.Length - 2);
  291. yield return CreateFieldWithCaching(name, jsonVal, storage,
  292. indexDefinition.GetIndex(name, Field.Index.NOT_ANALYZED_NO_NORMS), termVector);
  293. }
  294. foreach (var numericField in CreateNumericFieldWithCaching(name, value, storage, termVector))
  295. yield return numericField;
  296. }
  297. private static object GetNullValueForSorting(SortOptions? sortOptions)
  298. {
  299. switch (sortOptions)
  300. {
  301. case SortOptions.Short:
  302. case SortOptions.Int:
  303. return int.MinValue;
  304. case SortOptions.Double:
  305. return double.MinValue;
  306. break;
  307. case SortOptions.Float:
  308. return float.MinValue;
  309. // ReSharper disable RedundantCaseLabel
  310. case SortOptions.Long:
  311. // to be able to sort on timestamps
  312. case SortOptions.String:
  313. case SortOptions.StringVal:
  314. case SortOptions.None:
  315. case SortOptions.Custom:
  316. // ReSharper restore RedundantCaseLabel
  317. default:
  318. return long.MinValue;
  319. }
  320. }
  321. private IEnumerable<AbstractField> CreateNumericFieldWithCaching(string name, object value,
  322. Field.Store defaultStorage, Field.TermVector termVector)
  323. {
  324. var fieldName = name + "_Range";
  325. var storage = indexDefinition.GetStorage(name, defaultStorage);
  326. var cacheKey = new FieldCacheKey(name, null, storage, termVector, multipleItemsSameFieldCount.ToArray());
  327. NumericField numericField;
  328. if (numericFieldsCache.TryGetValue(cacheKey, out numericField) == false)
  329. {
  330. numericFieldsCache[cacheKey] = numericField = new NumericField(fieldName, storage, true);
  331. }
  332. if (value is TimeSpan)
  333. {
  334. yield return numericField.SetLongValue(((TimeSpan)value).Ticks);
  335. }
  336. else if (value is int)
  337. {
  338. if (indexDefinition.GetSortOption(name) == SortOptions.Long)
  339. yield return numericField.SetLongValue((int)value);
  340. else if (indexDefinition.GetSortOption(name) == SortOptions.Float)
  341. yield return numericField.SetFloatValue((int)value);
  342. else if (indexDefinition.GetSortOption(name) == SortOptions.Double)
  343. yield return numericField.SetDoubleValue((int)value);
  344. else
  345. yield return numericField.SetIntValue((int)value);
  346. }
  347. else if (value is long)
  348. {
  349. if (indexDefinition.GetSortOption(name) == SortOptions.Double)
  350. yield return numericField.SetDoubleValue((long)value);
  351. else if (indexDefinition.GetSortOption(name) == SortOptions.Float)
  352. yield return numericField.SetFloatValue((long)value);
  353. else if (indexDefinition.GetSortOption(name) == SortOptions.Int)
  354. yield return numericField.SetIntValue(Convert.ToInt32((long)value));
  355. else
  356. yield return numericField.SetLongValue((long)value);
  357. }
  358. else if (value is decimal)
  359. {
  360. if (indexDefinition.GetSortOption(name) == SortOptions.Float)
  361. yield return numericField.SetFloatValue(Convert.ToSingle((decimal)value));
  362. else if (indexDefinition.GetSortOption(name) == SortOptions.Int)
  363. yield return numericField.SetIntValue(Convert.ToInt32((decimal)value));
  364. else if (indexDefinition.GetSortOption(name) == SortOptions.Long)
  365. yield return numericField.SetLongValue(Convert.ToInt64((decimal) value));
  366. else
  367. yield return numericField.SetDoubleValue((double)(decimal)value);
  368. }
  369. else if (value is float)
  370. {
  371. if (indexDefinition.GetSortOption(name) == SortOptions.Double)
  372. yield return numericField.SetDoubleValue((float)value);
  373. else if (indexDefinition.GetSortOption(name) == SortOptions.Int)
  374. yield return numericField.SetIntValue(Convert.ToInt32((float)value));
  375. else if (indexDefinition.GetSortOption(name) == SortOptions.Long)
  376. yield return numericField.SetLongValue(Convert.ToInt64((float)value));
  377. else
  378. yield return numericField.SetFloatValue((float)value);
  379. }
  380. else if (value is double)
  381. {
  382. if (indexDefinition.GetSortOption(name) == SortOptions.Float)
  383. yield return numericField.SetFloatValue(Convert.ToSingle((double)value));
  384. else if (indexDefinition.GetSortOption(name) == SortOptions.Int)
  385. yield return numericField.SetIntValue(Convert.ToInt32((double)value));
  386. else if (indexDefinition.GetSortOption(name) == SortOptions.Long)
  387. yield return numericField.SetLongValue(Convert.ToInt64((double)value));
  388. else
  389. yield return numericField.SetDoubleValue((double)value);
  390. }
  391. }
  392. public static bool ShouldTreatAsEnumerable(object itemsToIndex)
  393. {
  394. if (itemsToIndex == null)
  395. return false;
  396. if (itemsToIndex is DynamicJsonObject)
  397. return false;
  398. if (itemsToIndex is string)
  399. return false;
  400. if (itemsToIndex is RavenJObject)
  401. return false;
  402. if (itemsToIndex is IDictionary)
  403. return false;
  404. return true;
  405. }
  406. private Field CreateBinaryFieldWithCaching(string name, byte[] value, Field.Store store, Field.Index index, Field.TermVector termVector)
  407. {
  408. if (value.Length > 1024)
  409. throw new ArgumentException("Binary values must be smaller than 1Kb");
  410. var cacheKey = new FieldCacheKey(name, null, store, termVector, multipleItemsSameFieldCount.ToArray());
  411. Field field;
  412. var stringWriter = new StringWriter();
  413. JsonExtensions.CreateDefaultJsonSerializer().Serialize(stringWriter, value);
  414. var sb = stringWriter.GetStringBuilder();
  415. sb.Remove(0, 1); // remove prefix "
  416. sb.Remove(sb.Length - 1, 1); // remove postfix "
  417. var val = sb.ToString();
  418. if (fieldsCache.TryGetValue(cacheKey, out field) == false)
  419. {
  420. fieldsCache[cacheKey] = field = new Field(name, val, store, index, termVector);
  421. }
  422. field.SetValue(val);
  423. field.Boost = 1;
  424. field.OmitNorms = true;
  425. return field;
  426. }
  427. public class FieldCacheKey
  428. {
  429. private readonly string name;
  430. private readonly Field.Index? index;
  431. private readonly Field.Store store;
  432. private readonly Field.TermVector termVector;
  433. private readonly int[] multipleItemsSameField;
  434. public FieldCacheKey(string name, Field.Index? index, Field.Store store, Field.TermVector termVector, int[] multipleItemsSameField)
  435. {
  436. this.name = name;
  437. this.index = index;
  438. this.store = store;
  439. this.termVector = termVector;
  440. this.multipleItemsSameField = multipleItemsSameField;
  441. }
  442. protected bool Equals(FieldCacheKey other)
  443. {
  444. return string.Equals(name, other.name) &&
  445. Equals(index, other.index) &&
  446. Equals(store, other.store) &&
  447. Equals(termVector, other.termVector) &&
  448. multipleItemsSameField.SequenceEqual(other.multipleItemsSameField);
  449. }
  450. public override bool Equals(object obj)
  451. {
  452. if (ReferenceEquals(null, obj)) return false;
  453. if (ReferenceEquals(this, obj)) return true;
  454. if (obj.GetType() != typeof(FieldCacheKey)) return false;
  455. return Equals((FieldCacheKey)obj);
  456. }
  457. public override int GetHashCode()
  458. {
  459. unchecked
  460. {
  461. int hashCode = (name != null ? name.GetHashCode() : 0);
  462. hashCode = (hashCode * 397) ^ (index != null ? index.GetHashCode() : 0);
  463. hashCode = (hashCode * 397) ^ store.GetHashCode();
  464. hashCode = (hashCode * 397) ^ termVector.GetHashCode();
  465. hashCode = multipleItemsSameField.Aggregate(hashCode, (h, x) => h * 397 ^ x);
  466. return hashCode;
  467. }
  468. }
  469. }
  470. private Field CreateFieldWithCaching(string name, string value, Field.Store store, Field.Index index, Field.TermVector termVector)
  471. {
  472. var cacheKey = new FieldCacheKey(name, index, store, termVector, multipleItemsSameFieldCount.ToArray());
  473. Field field;
  474. if (fieldsCache.TryGetValue(cacheKey, out field) == false)
  475. fieldsCache[cacheKey] = field = new Field(name, value, store, index, termVector);
  476. field.SetValue(value);
  477. field.Boost = 1;
  478. field.OmitNorms = true;
  479. return field;
  480. }
  481. private bool CanCreateFieldsForNestedArray(object value, Field.Index fieldIndexingOptions)
  482. {
  483. if (!fieldIndexingOptions.IsAnalyzed())
  484. {
  485. return true;
  486. }
  487. if (value == null || value is DynamicNullObject)
  488. {
  489. return false;
  490. }
  491. return true;
  492. }
  493. }
  494. }