/Raven.Database/Server/Controllers/BulkInsertController.cs

https://github.com/nwendel/ravendb · C# · 205 lines · 170 code · 28 blank · 7 comment · 11 complexity · 5a7c6eb2d1945ae6bb6f111bdc1d87c8 MD5 · raw file

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Diagnostics;
  4. using System.IO;
  5. using System.IO.Compression;
  6. using System.Linq;
  7. using System.Net;
  8. using System.Net.Http;
  9. using System.Threading;
  10. using System.Threading.Tasks;
  11. using System.Web;
  12. using System.Web.Http;
  13. using Raven.Abstractions;
  14. using Raven.Abstractions.Data;
  15. using Raven.Database.Actions;
  16. using Raven.Database.Extensions;
  17. using Raven.Database.Indexing;
  18. using Raven.Database.Server.Security;
  19. using Raven.Database.Util.Streams;
  20. using Raven.Imports.Newtonsoft.Json.Bson;
  21. using Raven.Json.Linq;
  22. namespace Raven.Database.Server.Controllers
  23. {
  24. [RoutePrefix("")]
  25. public class BulkInsertController : RavenDbApiController
  26. {
  27. [HttpPost]
  28. [Route("bulkInsert")]
  29. [Route("databases/{databaseName}/bulkInsert")]
  30. public async Task<HttpResponseMessage> BulkInsertPost()
  31. {
  32. if (string.IsNullOrEmpty(GetQueryStringValue("no-op")) == false)
  33. {
  34. // this is a no-op request which is there just to force the client HTTP layer to handle the authentication
  35. // only used for legacy clients
  36. return GetEmptyMessage();
  37. }
  38. if ("generate-single-use-auth-token".Equals(GetQueryStringValue("op"), StringComparison.InvariantCultureIgnoreCase))
  39. {
  40. // using windows auth with anonymous access = none sometimes generate a 401 even though we made two requests
  41. // instead of relying on windows auth, which require request buffering, we generate a one time token and return it.
  42. // we KNOW that the user have access to this db for writing, since they got here, so there is no issue in generating
  43. // a single use token for them.
  44. var authorizer = (MixedModeRequestAuthorizer)Configuration.Properties[typeof(MixedModeRequestAuthorizer)];
  45. var token = authorizer.GenerateSingleUseAuthToken(DatabaseName, User);
  46. return GetMessageWithObject(new
  47. {
  48. Token = token
  49. });
  50. }
  51. if (HttpContext.Current != null)
  52. HttpContext.Current.Server.ScriptTimeout = 60 * 60 * 6; // six hours should do it, I think.
  53. var options = new BulkInsertOptions
  54. {
  55. OverwriteExisting = GetOverwriteExisting(),
  56. CheckReferencesInIndexes = GetCheckReferencesInIndexes()
  57. };
  58. var operationId = ExtractOperationId();
  59. var sp = Stopwatch.StartNew();
  60. var status = new BulkInsertStatus();
  61. status.IsTimedOut = false;
  62. var documents = 0;
  63. var mre = new ManualResetEventSlim(false);
  64. var tre = new CancellationTokenSource();
  65. var inputStream = await InnerRequest.Content.ReadAsStreamAsync().ConfigureAwait(false);
  66. var currentDatabase = Database;
  67. var timeout = tre.TimeoutAfter(currentDatabase.Configuration.BulkImportBatchTimeout);
  68. var task = Task.Factory.StartNew(() =>
  69. {
  70. try
  71. {
  72. currentDatabase.Documents.BulkInsert(options, YieldBatches(timeout, inputStream, mre, batchSize => documents += batchSize), operationId, tre.Token);
  73. }
  74. catch (OperationCanceledException)
  75. {
  76. // happens on timeout
  77. currentDatabase.Notifications.RaiseNotifications(new BulkInsertChangeNotification { OperationId = operationId, Message = "Operation cancelled, likely because of a batch timeout", Type = DocumentChangeTypes.BulkInsertError });
  78. status.Completed = true;
  79. status.IsTimedOut = true;
  80. throw;
  81. }
  82. status.Completed = true;
  83. status.Documents = documents;
  84. });
  85. long id;
  86. Database.Tasks.AddTask(task, status, new TaskActions.PendingTaskDescription
  87. {
  88. StartTime = SystemTime.UtcNow,
  89. TaskType = TaskActions.PendingTaskType.BulkInsert,
  90. Payload = operationId.ToString()
  91. }, out id, tre);
  92. task.Wait(Database.WorkContext.CancellationToken);
  93. if (status.IsTimedOut)
  94. throw new TimeoutException("Bulk insert operation did not receive new data longer than configured treshold");
  95. sp.Stop();
  96. AddRequestTraceInfo(log => log.AppendFormat("\tBulk inserted received {0:#,#;;0} documents in {1}, task #: {2}", documents, sp.Elapsed, id));
  97. return GetMessageWithObject(new
  98. {
  99. OperationId = id
  100. });
  101. }
  102. private Guid ExtractOperationId()
  103. {
  104. Guid result;
  105. Guid.TryParse(GetQueryStringValue("operationId"), out result);
  106. return result;
  107. }
  108. private IEnumerable<IEnumerable<JsonDocument>> YieldBatches(CancellationTimeout timeout, Stream inputStream, ManualResetEventSlim mre, Action<int> increaseDocumentsCount)
  109. {
  110. try
  111. {
  112. using (inputStream)
  113. {
  114. var binaryReader = new BinaryReader(inputStream);
  115. while (true)
  116. {
  117. timeout.ThrowIfCancellationRequested();
  118. int size;
  119. try
  120. {
  121. size = binaryReader.ReadInt32();
  122. }
  123. catch (EndOfStreamException)
  124. {
  125. break;
  126. }
  127. using (var stream = new PartialStream(inputStream, size))
  128. {
  129. yield return YieldDocumentsInBatch(timeout, stream, increaseDocumentsCount);
  130. }
  131. }
  132. }
  133. }
  134. finally
  135. {
  136. mre.Set();
  137. inputStream.Close();
  138. }
  139. }
  140. private IEnumerable<JsonDocument> YieldDocumentsInBatch(CancellationTimeout timeout, Stream partialStream, Action<int> increaseDocumentsCount)
  141. {
  142. using (var stream = new GZipStream(partialStream, CompressionMode.Decompress, leaveOpen: true))
  143. {
  144. var reader = new BinaryReader(stream);
  145. var count = reader.ReadInt32();
  146. for (var i = 0; i < count; i++)
  147. {
  148. timeout.Delay();
  149. var doc = (RavenJObject)RavenJToken.ReadFrom(new BsonReader(reader)
  150. {
  151. DateTimeKindHandling = DateTimeKind.Unspecified
  152. });
  153. var metadata = doc.Value<RavenJObject>("@metadata");
  154. if (metadata == null)
  155. throw new InvalidOperationException("Could not find metadata for document");
  156. var id = metadata.Value<string>("@id");
  157. if (string.IsNullOrEmpty(id))
  158. throw new InvalidOperationException("Could not get id from metadata");
  159. doc.Remove("@metadata");
  160. yield return new JsonDocument
  161. {
  162. Key = id,
  163. DataAsJson = doc,
  164. Metadata = metadata
  165. };
  166. }
  167. increaseDocumentsCount(count);
  168. }
  169. }
  170. public class BulkInsertStatus
  171. {
  172. public int Documents { get; set; }
  173. public bool Completed { get; set; }
  174. public bool IsTimedOut { get; set; }
  175. }
  176. }
  177. }