DataTable /Sources/CsvReader.Azure/GenericTableWriter.cs

Language C# Lines 260
MD5 Hash 99b0fa233d308f58f66f41758c321092
Repository https://github.com/tpwalke2/DataTable.git View Raw File
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data.Services.Client;
using System.Xml.Linq;
using System.Xml;
using System.Data.Services.Common;
using Microsoft.WindowsAzure;
using Microsoft.WindowsAzure.StorageClient;
using System.Text.RegularExpressions;

namespace DataAccess
{
    // Bridge between a DataTable Row and a AzureTable TableServiceEntity
    [DataServiceKey("PartitionKey", "RowKey")]
    internal class GenericWriterEntity : TableServiceEntity
    {
        // Use a custom write hook to convert from a Row to EDM properties.
        public Row _source;        
    }

    internal class GenericTableWriter
    {
        private string[] _edmTypeNames;
        private string[] _columnNames;

        private static bool Compare(string a, string b)
        {
            return string.Compare(a, b, StringComparison.OrdinalIgnoreCase) == 0;
        }

        private static bool IsSpecialColumnName(string columnName)
        { 
            // Case-insensitive compare
            return Compare(columnName, "PartitionKey") || Compare(columnName, "RowKey") || Compare(columnName, "Timestamp");
        }

        // $$$ Should be some common helper. This is protected on Row; but should be on table. 
        private static int GetColumnIndex(string columnName, string[] columnNames)
        {
            for (int i = 0; i < columnNames.Length; i++)
            {
                if (Compare(columnNames[i], columnName))
                {
                    return i;
                }
            }
            return -1;
        }

        // Azure table names are very restrictive, so sanity check upfront to give a useful error.
        // http://msdn.microsoft.com/en-us/library/windowsazure/dd179338.aspx
        private static void ValidateAzureTableName(string tableName)
        {
            if (!Regex.IsMatch(tableName, "^[A-Za-z][A-Za-z0-9]{2,62}$"))
            {
                throw new InvalidOperationException(string.Format("{0} is not a valid name for an azure table", tableName));
            }
        }

        // Get a function that will determine the partition row key
        private static Func<int, Row, ParitionRowKey> GetPartitionRowKeyFunc(string[] columnNames)
        { 
            // If incoming table has columns named "PartitionKey" and "RowKey", then use those. 
            int iPartitionKey = GetColumnIndex("PartitionKey", columnNames);
            int iRowKey = GetColumnIndex("RowKey", columnNames);
            if (iPartitionKey >= 0 && iRowKey  >= 0)
            {
                // Both row and partition key
                return (rowIndex, row) => new ParitionRowKey(row.Values[iPartitionKey], row.Values[iRowKey]);
            }
            else if ((iPartitionKey < 0) && (iRowKey >= 0))
            {
                // Only row Key
                return (rowIndex, row) => new ParitionRowKey("1", row.Values[iRowKey]);
            }
            else if ((iPartitionKey >= 0) && (iRowKey < 0))
            {
                // Only a partition key
                return (rowIndex, row) => new ParitionRowKey(row.Values[iPartitionKey], rowIndex);
            }
            else
            {                    
                // format rowkey so that when sorted alpanumerically, it's still ascending
                return (rowIndex, row) => new ParitionRowKey("1", rowIndex);
            }            
        }

        // Write a DataTable to an AzureTable.
        // DataTable's Rows are an unstructured property bag.
        // columnTypes - type of the column, or null if column should be skipped. Length of columnTypes should be the same as number of columns.
        public static void SaveToAzureTable(DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func<int, Row, ParitionRowKey> funcComputeKeys)
        {
            if (table == null)
            {
                throw new ArgumentNullException("table");
            }
            if (account == null)
            {
                throw new ArgumentNullException("account");
            }
            if (columnTypes == null)
            {
                throw new ArgumentNullException("columnTypes");
            }
            if (tableName == null)
            {
                throw new ArgumentNullException("tableName");
            }
            ValidateAzureTableName(tableName);

            // Azure tables have "special" columns. 
            // We can skip these by settings columnType[i] to null, which means don't write that column
            string[] columnNames = table.ColumnNames.ToArray();
            if (columnNames.Length != columnTypes.Length)
            {
                throw new ArgumentException(string.Format("columnTypes should have {0} elements", columnNames.Length), "columnTypes");
            }

            columnTypes = columnTypes.ToArray(); // create a copy for mutation. 
            for (int i = 0; i < columnNames.Length; i++)
            {
                if (IsSpecialColumnName(columnNames[i]))
                {
                    columnTypes[i] = null;
                }
            }

            if (funcComputeKeys == null)
            {
                funcComputeKeys = GetPartitionRowKeyFunc(columnNames);
            }

            // Validate columnTypes 
            string [] edmTypeNames = Array.ConvertAll(columnTypes, 
                 columnType => {
                     if (columnType == null)
                     {
                         return null;
                     }
                     string edmTypeName;
                     _edmNameMapping.TryGetValue(columnType, out edmTypeName);
                     if (edmTypeName == null)
                     {
                         // Unsupported type!
                         throw new InvalidOperationException(string.Format("Type '{0}' is not a supported type on azure tables", columnType.FullName));
                     }
                     return edmTypeName;
                 });


            CloudTableClient tableClient = account.CreateCloudTableClient();

            tableClient.DeleteTableIfExist(tableName);
            tableClient.CreateTableIfNotExist(tableName);
            
            
            GenericTableWriter w = new GenericTableWriter 
            {
                _edmTypeNames = edmTypeNames,
                _columnNames = table.ColumnNames.ToArray()
            };
            
            // Batch rows for performance, 
            // but all rows in the batch must have the same partition key
            TableServiceContext ctx = null;
            string lastPartitionKey = null;

            int rowCounter = 0;
            int batchSize = 0;
            foreach (Row row in table.Rows)
            {
                GenericWriterEntity entity = new GenericWriterEntity { _source = row };
                // Compute row and partition keys too. 
                var partRow = funcComputeKeys(rowCounter, row);
                entity.PartitionKey = partRow.PartitionKey;
                entity.RowKey = partRow.RowKey;
                rowCounter++;

                // but all rows in the batch must have the same partition key
                if ((ctx != null) && (lastPartitionKey != null) && (lastPartitionKey != entity.PartitionKey))
                {
                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    ctx = null;
                }                
                
                if (ctx == null)
                {
                    lastPartitionKey = null;
                    ctx = tableClient.GetDataServiceContext();
                    ctx.WritingEntity += new EventHandler<ReadingWritingEntityEventArgs>(w.ctx_WritingEntity);
                    batchSize = 0;
                }

                // Add enty to the current batch
                ctx.AddObject(tableName, entity);
                lastPartitionKey = entity.PartitionKey;
                batchSize++;
                                
                if (batchSize % 50 == 0)
                {
                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
                    ctx = null;
                }
            }

            if (ctx != null)
            {
                ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
            }
        }

        private void ctx_WritingEntity(object sender, ReadingWritingEntityEventArgs args)
        {
            GenericWriterEntity entity = args.Entity as GenericWriterEntity;
            if (entity == null)
            {
                return;
            }

            XElement properties = args.Data.Descendants(GenericTableReader.MetadataNamespace + "properties").First();

            for(int iColumnn = 0; iColumnn < _edmTypeNames.Length; iColumnn++)
            {
                string edmTypeName = _edmTypeNames[iColumnn];
                if (edmTypeName == null)
                {
                    continue;
                }

                string value = entity._source.Values[iColumnn];                
                string columnName = _columnNames[iColumnn];

                // framework will handle row + partition keys. 
                XElement e = new XElement(GenericTableReader.DataNamespace + columnName, value);
                e.Add(new XAttribute(GenericTableReader.MetadataNamespace + "type", edmTypeName));

                properties.Add(e);
            }            
        }
     
        // Mapping of .NET types to EDM types.
        static Dictionary<Type, string> _edmNameMapping = new Dictionary<Type, string> { 
            { typeof(string), "Edm.String" },
            { typeof(byte), "Edm.Byte" },
            { typeof(sbyte), "Edm.SByte" },
            { typeof(short), "Edm.Int16" },
            { typeof(int), "Edm.Int32" },
            { typeof(long), "Edm.Int64" },
            { typeof(double), "Edm.Double" }, 
            { typeof(float), "Edm.Single" },
            { typeof(bool), "Edm.Boolean" },
            { typeof(decimal), "Edm.Decimal" },
            { typeof(DateTime), "Edm.DateTime" },
            { typeof(Guid), "Edm.Guid" }
        };

    }
}
Back to Top