PageRenderTime 42ms CodeModel.GetById 18ms app.highlight 18ms RepoModel.GetById 1ms app.codeStats 1ms

/Rhino.Etl.Core/Operations/SqlBulkInsertOperation.cs

http://github.com/ayende/rhino-etl
C# | 387 lines | 274 code | 40 blank | 73 comment | 24 complexity | dd7dce5c7a82fabf831468d6fd0f549c MD5 | raw file
  1using System.Configuration;
  2using Rhino.Etl.Core.Infrastructure;
  3
  4namespace Rhino.Etl.Core.Operations
  5{
  6    using System;
  7    using System.Linq;
  8    using System.Collections.Generic;
  9    using System.Data;
 10    using System.Data.SqlClient;
 11    using DataReaders;
 12
 13    /// <summary>
 14    /// Allows to execute an operation that perform a bulk insert into a sql server database
 15    /// </summary>
 16    public abstract class SqlBulkInsertOperation : AbstractDatabaseOperation
 17    {
 18        /// <summary>
 19        /// The schema of the destination table
 20        /// </summary>
 21        private IDictionary<string, Type> _schema = new Dictionary<string, Type>();
 22
 23        /// <summary>
 24        /// The mapping of columns from the row to the database schema.
 25        /// Important: The column name in the database is case sensitive!
 26        /// </summary>
 27        public IDictionary<string, string> Mappings = new Dictionary<string, string>();
 28        private readonly IDictionary<string, Type> _inputSchema = new Dictionary<string, Type>();
 29
 30        private SqlBulkCopy sqlBulkCopy;
 31        private string targetTable;
 32        private int timeout;
 33        private int batchSize;
 34        private    int    notifyBatchSize;
 35        private SqlBulkCopyOptions bulkCopyOptions = SqlBulkCopyOptions.Default;
 36        
 37
 38        /// <summary>
 39        /// Initializes a new instance of the <see cref="SqlBulkInsertOperation"/> class.
 40        /// </summary>
 41        /// <param name="connectionStringName">Name of the connection string.</param>
 42        /// <param name="targetTable">The target table.</param>
 43        protected SqlBulkInsertOperation(string connectionStringName, string targetTable)
 44            : this(ConfigurationManager.ConnectionStrings[connectionStringName], targetTable)
 45        {
 46
 47        }
 48
 49        /// <summary>
 50        /// Initializes a new instance of the <see cref="SqlBulkInsertOperation"/> class.
 51        /// </summary>
 52        /// <param name="connectionStringSettings">Connection string settings to use.</param>
 53        /// <param name="targetTable">The target table.</param>
 54        protected SqlBulkInsertOperation(ConnectionStringSettings connectionStringSettings, string targetTable)
 55            : this(connectionStringSettings, targetTable, 600)
 56        {
 57
 58        }
 59
 60        /// <summary>
 61        /// Initializes a new instance of the <see cref="SqlBulkInsertOperation"/> class.
 62        /// </summary>
 63        /// <param name="connectionStringName">Name of the connection string.</param>
 64        /// <param name="targetTable">The target table.</param>
 65        /// <param name="timeout">The timeout.</param>
 66        protected SqlBulkInsertOperation(string connectionStringName, string targetTable, int timeout)
 67            : this(ConfigurationManager.ConnectionStrings[connectionStringName], targetTable, timeout)
 68        {
 69            Guard.Against(string.IsNullOrEmpty(targetTable), "TargetTable was not set, but it is mandatory");
 70            this.targetTable = targetTable;
 71            this.timeout = timeout;
 72        }
 73
 74        /// <summary>
 75        /// Initializes a new instance of the <see cref="SqlBulkInsertOperation"/> class.
 76        /// </summary>
 77        /// <param name="connectionStringSettings">Connection string settings to use.</param>
 78        /// <param name="targetTable">The target table.</param>
 79        /// <param name="timeout">The timeout.</param>
 80        protected SqlBulkInsertOperation(ConnectionStringSettings connectionStringSettings, string targetTable, int timeout)
 81            : base(connectionStringSettings)
 82        {
 83            Guard.Against(string.IsNullOrEmpty(targetTable), "TargetTable was not set, but it is mandatory");
 84            this.targetTable = targetTable;
 85            this.timeout = timeout;
 86        }
 87
 88        /// <summary>The timeout value of the bulk insert operation</summary>
 89        public virtual int Timeout
 90        {
 91            get { return timeout; }
 92            set { timeout = value; }
 93        }
 94
 95        /// <summary>The batch size value of the bulk insert operation</summary>
 96        public virtual int BatchSize
 97        {
 98            get { return batchSize; }
 99            set { batchSize = value; }
100        }
101
102        ///    <summary>The batch size    value of the bulk insert operation</summary>
103        public virtual int NotifyBatchSize
104        {
105            get    { return notifyBatchSize>0 ? notifyBatchSize : batchSize; }
106            set    { notifyBatchSize =    value; }
107        }
108
109        /// <summary>The table or view to bulk load the data into.</summary>
110        public string TargetTable
111        {
112            get { return targetTable; }
113            set { targetTable = value; }
114        }
115
116        /// <summary><c>true</c> to turn the <see cref="SqlBulkCopyOptions.TableLock"/> option on, otherwise <c>false</c>.</summary>
117        public virtual bool LockTable
118        {
119            get { return IsOptionOn(SqlBulkCopyOptions.TableLock); }
120            set { ToggleOption(SqlBulkCopyOptions.TableLock, value); }
121        }
122
123        /// <summary><c>true</c> to turn the <see cref="SqlBulkCopyOptions.KeepIdentity"/> option on, otherwise <c>false</c>.</summary>
124        public virtual bool KeepIdentity
125        {
126            get { return IsOptionOn(SqlBulkCopyOptions.KeepIdentity); }
127            set { ToggleOption(SqlBulkCopyOptions.KeepIdentity, value); }
128        }
129
130        /// <summary><c>true</c> to turn the <see cref="SqlBulkCopyOptions.KeepNulls"/> option on, otherwise <c>false</c>.</summary>
131        public virtual bool KeepNulls
132        {
133            get { return IsOptionOn(SqlBulkCopyOptions.KeepNulls); }
134            set { ToggleOption(SqlBulkCopyOptions.KeepNulls, value); }
135        }
136
137        /// <summary><c>true</c> to turn the <see cref="SqlBulkCopyOptions.CheckConstraints"/> option on, otherwise <c>false</c>.</summary>
138        public virtual bool CheckConstraints
139        {
140            get { return IsOptionOn(SqlBulkCopyOptions.CheckConstraints); }
141            set { ToggleOption(SqlBulkCopyOptions.CheckConstraints, value); }
142        }
143
144        /// <summary><c>true</c> to turn the <see cref="SqlBulkCopyOptions.FireTriggers"/> option on, otherwise <c>false</c>.</summary>
145        public virtual bool FireTriggers
146        {
147            get { return IsOptionOn(SqlBulkCopyOptions.FireTriggers); }
148            set { ToggleOption(SqlBulkCopyOptions.FireTriggers, value); }
149        }
150
151        /// <summary>Turns a <see cref="bulkCopyOptions"/> on or off depending on the value of <paramref name="on"/></summary>
152        /// <param name="option">The <see cref="SqlBulkCopyOptions"/> to turn on or off.</param>
153        /// <param name="on"><c>true</c> to set the <see cref="SqlBulkCopyOptions"/> <paramref name="option"/> on otherwise <c>false</c> to turn the <paramref name="option"/> off.</param>
154        protected void ToggleOption(SqlBulkCopyOptions option, bool on)
155        {
156            if (on)
157            {
158                TurnOptionOn(option);
159            }
160            else
161            {
162                TurnOptionOff(option);
163            }
164        }
165
166        /// <summary>Returns <c>true</c> if the <paramref name="option"/> is turned on, otherwise <c>false</c></summary>
167        /// <param name="option">The <see cref="SqlBulkCopyOptions"/> option to test for.</param>
168        /// <returns></returns>
169        protected bool IsOptionOn(SqlBulkCopyOptions option)
170        {
171            return (bulkCopyOptions & option) == option;
172        }
173
174        /// <summary>Turns the <paramref name="option"/> on.</summary>
175        /// <param name="option"></param>
176        protected void TurnOptionOn(SqlBulkCopyOptions option)
177        {
178            bulkCopyOptions |= option;
179        }
180
181        /// <summary>Turns the <paramref name="option"/> off.</summary>
182        /// <param name="option"></param>
183        protected void TurnOptionOff(SqlBulkCopyOptions option)
184        {
185            if (IsOptionOn(option))
186                bulkCopyOptions ^= option;
187        }
188
189        /// <summary>The table or view's schema information.</summary>
190        public IDictionary<string, Type> Schema
191        {
192            get { return _schema; }
193            set { _schema = value; }
194        }
195
196        /// <summary>
197        /// Prepares the mapping for use, by default, it uses the schema mapping.
198        /// This is the preferred appraoch
199        /// </summary>
200        public virtual void PrepareMapping()
201        {
202            foreach (KeyValuePair<string, Type> pair in _schema)
203            {
204                Mappings[pair.Key] = pair.Key;
205            }
206        }
207
208        /// <summary>Use the destination Schema and Mappings to create the
209        /// operations input schema so it can build the adapter for sending
210        /// to the WriteToServer method.</summary>
211        public virtual void CreateInputSchema()
212        {
213            foreach(KeyValuePair<string, string> pair in Mappings)
214            {
215                _inputSchema.Add(pair.Key, _schema[pair.Value]);
216            }
217        }
218
219        /// <summary>
220        /// Executes this operation
221        /// </summary>
222        public override IEnumerable<Row> Execute(IEnumerable<Row> rows)
223        {
224            Guard.Against<ArgumentException>(rows == null, "SqlBulkInsertOperation cannot accept a null enumerator");
225            PrepareSchema();
226            PrepareMapping();
227            CreateInputSchema();
228            using (SqlConnection connection = (SqlConnection)Use.Connection(ConnectionStringSettings))
229            using (SqlTransaction transaction = (SqlTransaction) BeginTransaction(connection))
230            {
231                sqlBulkCopy = CreateSqlBulkCopy(connection, transaction);
232                DictionaryEnumeratorDataReader adapter = new DictionaryEnumeratorDataReader(_inputSchema, rows);
233                try
234                {
235                    sqlBulkCopy.WriteToServer(adapter);
236                }
237                catch (InvalidOperationException)
238                {
239                    CompareSqlColumns(connection, transaction, rows);
240                    throw;
241                }
242
243                if (PipelineExecuter.HasErrors)
244                {
245                    Warn("Rolling back transaction in {0}", Name);
246                    if (transaction != null) transaction.Rollback();
247                    Warn("Rolled back transaction in {0}", Name);
248                }
249                else
250                {
251                    Debug("Committing {0}", Name);
252                    if (transaction != null) transaction.Commit();
253                    Debug("Committed {0}", Name);
254                }
255            }
256            yield break;
257        }
258
259        /// <summary>
260        ///    Handle sql notifications
261        ///    </summary>
262        protected virtual void onSqlRowsCopied(object sender, SqlRowsCopiedEventArgs e)
263        {
264            Debug("{0} rows    copied to database", e.RowsCopied);
265        }
266
267        ///    <summary>
268        /// Prepares the schema of the target table
269        /// </summary>
270        protected abstract void PrepareSchema();
271
272        /// <summary>
273        /// Creates the SQL bulk copy instance
274        /// </summary>
275        private SqlBulkCopy CreateSqlBulkCopy(SqlConnection connection, SqlTransaction transaction)
276        {
277            SqlBulkCopy copy = new SqlBulkCopy(connection, bulkCopyOptions, transaction);
278            copy.BatchSize = batchSize;
279            foreach (KeyValuePair<string, string> pair in Mappings)
280            {
281                copy.ColumnMappings.Add(pair.Key, pair.Value);
282            }
283            copy.NotifyAfter = NotifyBatchSize;
284            copy.SqlRowsCopied += onSqlRowsCopied;
285            copy.DestinationTableName = TargetTable;
286            copy.BulkCopyTimeout = Timeout;
287            return copy;
288        }
289
290        private void CompareSqlColumns(SqlConnection connection, SqlTransaction transaction, IEnumerable<Row> rows)
291        {
292            var command = connection.CreateCommand();
293            command.CommandText = "select * from {TargetTable} where 1=0".Replace("{TargetTable}", TargetTable);
294            command.CommandType = CommandType.Text;
295            command.Transaction = transaction;
296
297            using (var reader = command.ExecuteReader(CommandBehavior.KeyInfo))
298            {
299                var schemaTable = reader.GetSchemaTable();
300                var databaseColumns = schemaTable.Rows
301                    .OfType<DataRow>()
302                    .Select(r => new
303                    {
304                        Name = (string)r["ColumnName"],
305                        Type = (Type)r["DataType"],
306                        IsNullable = (bool)r["AllowDBNull"],
307                        MaxLength = (int)r["ColumnSize"]
308                    })
309                    .ToArray();
310
311                var missingColumns = _schema.Keys.Except(
312                    databaseColumns.Select(c => c.Name));
313                if (missingColumns.Any())
314                    throw new InvalidOperationException(
315                        "The following columns are not in the target table: " +
316                        string.Join(", ", missingColumns.ToArray()));
317                var differentColumns = _schema
318                    .Select(s => new
319                    {
320                        Name = s.Key,
321                        SchemaType = s.Value,
322                        DatabaseType = databaseColumns.Single(c => c.Name == s.Key)
323                    })
324                    .Where(c => !TypesMatch(c.SchemaType, c.DatabaseType.Type, c.DatabaseType.IsNullable));
325                if (differentColumns.Any())
326                    throw new InvalidOperationException(
327                        "The following columns have different types in the target table: " +
328                        string.Join(", ", differentColumns
329                            //.Select(c => $"{c.Name}: is {GetFriendlyName(c.SchemaType)}, but should be {GetFriendlyName(c.DatabaseType.Type)}{(c.DatabaseType.IsNullable ? "?" : "")}.")
330                            // c.Name, GetFriendlyName(c.SchemaType), GetFriendlyName(c.DatabaseType.Type), (c.DatabaseType.IsNullable ? \"?\" : \"\")
331                            .Select(c => string.Format("{0}: is {1}, but should be {2}{3}.", c.Name,
332                                GetFriendlyName(c.SchemaType), GetFriendlyName(c.DatabaseType.Type),
333                                (c.DatabaseType.IsNullable ? "?" : "")))
334                            .ToArray()
335                            ));
336                var stringsTooLong =
337                    (from column in databaseColumns
338                     where column.Type == typeof(string)
339                     from mapping in Mappings
340                     where mapping.Value == column.Name
341                     let name = mapping.Key
342                     from row in rows
343                     let value = (string)row[name]
344                     where value != null && value.Length > column.MaxLength
345                     select new { column.Name, column.MaxLength, Value = value })
346                    .ToArray();
347                if (stringsTooLong.Any())
348                    throw new InvalidOperationException(
349                        "The folowing columns have values too long for the target table: " +
350                        string.Join(", ", stringsTooLong
351                            .Select(s => "{s.Name}: max length is {s.MaxLength}, value is {s.Value}."
352                                .Replace("{s.Name}", s.Name)
353                                .Replace("{s.MaxLength}", s.MaxLength.ToString())
354                                .Replace("{s.Value}", s.Value)
355                            )
356                            .ToArray()));
357            }
358        }
359
360        private static string GetFriendlyName(Type type)
361        {
362            var friendlyName = type.Name;
363            if (!type.IsGenericType)
364                return friendlyName;
365
366            var iBacktick = friendlyName.IndexOf('`');
367            if (iBacktick > 0)
368                friendlyName = friendlyName.Remove(iBacktick);
369
370            var genericParameters = type.GetGenericArguments()
371                .Select(x => GetFriendlyName(x))
372                .ToArray();
373            friendlyName += "<" + string.Join(", ", genericParameters) + ">";
374
375            return friendlyName;
376        }
377
378        private bool TypesMatch(Type schemaType, Type databaseType, bool isNullable)
379        {
380            if (schemaType == databaseType)
381                return true;
382            if (isNullable && schemaType == typeof(Nullable<>).MakeGenericType(databaseType))
383                return true;
384            return false;
385        }
386    }
387}