PageRenderTime 19ms CodeModel.GetById 9ms app.highlight 7ms RepoModel.GetById 1ms app.codeStats 0ms

/Sources/CsvReader.Azure/GenericTableWriter.cs

https://github.com/tpwalke2/DataTable
C# | 260 lines | 202 code | 32 blank | 26 comment | 49 complexity | 99b0fa233d308f58f66f41758c321092 MD5 | raw file
  1using System;
  2using System.Collections.Generic;
  3using System.Linq;
  4using System.Text;
  5using System.Data.Services.Client;
  6using System.Xml.Linq;
  7using System.Xml;
  8using System.Data.Services.Common;
  9using Microsoft.WindowsAzure;
 10using Microsoft.WindowsAzure.StorageClient;
 11using System.Text.RegularExpressions;
 12
 13namespace DataAccess
 14{
 15    // Bridge between a DataTable Row and a AzureTable TableServiceEntity
 16    [DataServiceKey("PartitionKey", "RowKey")]
 17    internal class GenericWriterEntity : TableServiceEntity
 18    {
 19        // Use a custom write hook to convert from a Row to EDM properties.
 20        public Row _source;        
 21    }
 22
 23    internal class GenericTableWriter
 24    {
 25        private string[] _edmTypeNames;
 26        private string[] _columnNames;
 27
 28        private static bool Compare(string a, string b)
 29        {
 30            return string.Compare(a, b, StringComparison.OrdinalIgnoreCase) == 0;
 31        }
 32
 33        private static bool IsSpecialColumnName(string columnName)
 34        { 
 35            // Case-insensitive compare
 36            return Compare(columnName, "PartitionKey") || Compare(columnName, "RowKey") || Compare(columnName, "Timestamp");
 37        }
 38
 39        // $$$ Should be some common helper. This is protected on Row; but should be on table. 
 40        private static int GetColumnIndex(string columnName, string[] columnNames)
 41        {
 42            for (int i = 0; i < columnNames.Length; i++)
 43            {
 44                if (Compare(columnNames[i], columnName))
 45                {
 46                    return i;
 47                }
 48            }
 49            return -1;
 50        }
 51
 52        // Azure table names are very restrictive, so sanity check upfront to give a useful error.
 53        // http://msdn.microsoft.com/en-us/library/windowsazure/dd179338.aspx
 54        private static void ValidateAzureTableName(string tableName)
 55        {
 56            if (!Regex.IsMatch(tableName, "^[A-Za-z][A-Za-z0-9]{2,62}$"))
 57            {
 58                throw new InvalidOperationException(string.Format("{0} is not a valid name for an azure table", tableName));
 59            }
 60        }
 61
 62        // Get a function that will determine the partition row key
 63        private static Func<int, Row, ParitionRowKey> GetPartitionRowKeyFunc(string[] columnNames)
 64        { 
 65            // If incoming table has columns named "PartitionKey" and "RowKey", then use those. 
 66            int iPartitionKey = GetColumnIndex("PartitionKey", columnNames);
 67            int iRowKey = GetColumnIndex("RowKey", columnNames);
 68            if (iPartitionKey >= 0 && iRowKey  >= 0)
 69            {
 70                // Both row and partition key
 71                return (rowIndex, row) => new ParitionRowKey(row.Values[iPartitionKey], row.Values[iRowKey]);
 72            }
 73            else if ((iPartitionKey < 0) && (iRowKey >= 0))
 74            {
 75                // Only row Key
 76                return (rowIndex, row) => new ParitionRowKey("1", row.Values[iRowKey]);
 77            }
 78            else if ((iPartitionKey >= 0) && (iRowKey < 0))
 79            {
 80                // Only a partition key
 81                return (rowIndex, row) => new ParitionRowKey(row.Values[iPartitionKey], rowIndex);
 82            }
 83            else
 84            {                    
 85                // format rowkey so that when sorted alpanumerically, it's still ascending
 86                return (rowIndex, row) => new ParitionRowKey("1", rowIndex);
 87            }            
 88        }
 89
 90        // Write a DataTable to an AzureTable.
 91        // DataTable's Rows are an unstructured property bag.
 92        // columnTypes - type of the column, or null if column should be skipped. Length of columnTypes should be the same as number of columns.
 93        public static void SaveToAzureTable(DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func<int, Row, ParitionRowKey> funcComputeKeys)
 94        {
 95            if (table == null)
 96            {
 97                throw new ArgumentNullException("table");
 98            }
 99            if (account == null)
100            {
101                throw new ArgumentNullException("account");
102            }
103            if (columnTypes == null)
104            {
105                throw new ArgumentNullException("columnTypes");
106            }
107            if (tableName == null)
108            {
109                throw new ArgumentNullException("tableName");
110            }
111            ValidateAzureTableName(tableName);
112
113            // Azure tables have "special" columns. 
114            // We can skip these by settings columnType[i] to null, which means don't write that column
115            string[] columnNames = table.ColumnNames.ToArray();
116            if (columnNames.Length != columnTypes.Length)
117            {
118                throw new ArgumentException(string.Format("columnTypes should have {0} elements", columnNames.Length), "columnTypes");
119            }
120
121            columnTypes = columnTypes.ToArray(); // create a copy for mutation. 
122            for (int i = 0; i < columnNames.Length; i++)
123            {
124                if (IsSpecialColumnName(columnNames[i]))
125                {
126                    columnTypes[i] = null;
127                }
128            }
129
130            if (funcComputeKeys == null)
131            {
132                funcComputeKeys = GetPartitionRowKeyFunc(columnNames);
133            }
134
135            // Validate columnTypes 
136            string [] edmTypeNames = Array.ConvertAll(columnTypes, 
137                 columnType => {
138                     if (columnType == null)
139                     {
140                         return null;
141                     }
142                     string edmTypeName;
143                     _edmNameMapping.TryGetValue(columnType, out edmTypeName);
144                     if (edmTypeName == null)
145                     {
146                         // Unsupported type!
147                         throw new InvalidOperationException(string.Format("Type '{0}' is not a supported type on azure tables", columnType.FullName));
148                     }
149                     return edmTypeName;
150                 });
151
152
153            CloudTableClient tableClient = account.CreateCloudTableClient();
154
155            tableClient.DeleteTableIfExist(tableName);
156            tableClient.CreateTableIfNotExist(tableName);
157            
158            
159            GenericTableWriter w = new GenericTableWriter 
160            {
161                _edmTypeNames = edmTypeNames,
162                _columnNames = table.ColumnNames.ToArray()
163            };
164            
165            // Batch rows for performance, 
166            // but all rows in the batch must have the same partition key
167            TableServiceContext ctx = null;
168            string lastPartitionKey = null;
169
170            int rowCounter = 0;
171            int batchSize = 0;
172            foreach (Row row in table.Rows)
173            {
174                GenericWriterEntity entity = new GenericWriterEntity { _source = row };
175                // Compute row and partition keys too. 
176                var partRow = funcComputeKeys(rowCounter, row);
177                entity.PartitionKey = partRow.PartitionKey;
178                entity.RowKey = partRow.RowKey;
179                rowCounter++;
180
181                // but all rows in the batch must have the same partition key
182                if ((ctx != null) && (lastPartitionKey != null) && (lastPartitionKey != entity.PartitionKey))
183                {
184                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
185                    ctx = null;
186                }                
187                
188                if (ctx == null)
189                {
190                    lastPartitionKey = null;
191                    ctx = tableClient.GetDataServiceContext();
192                    ctx.WritingEntity += new EventHandler<ReadingWritingEntityEventArgs>(w.ctx_WritingEntity);
193                    batchSize = 0;
194                }
195
196                // Add enty to the current batch
197                ctx.AddObject(tableName, entity);
198                lastPartitionKey = entity.PartitionKey;
199                batchSize++;
200                                
201                if (batchSize % 50 == 0)
202                {
203                    ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
204                    ctx = null;
205                }
206            }
207
208            if (ctx != null)
209            {
210                ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
211            }
212        }
213
214        private void ctx_WritingEntity(object sender, ReadingWritingEntityEventArgs args)
215        {
216            GenericWriterEntity entity = args.Entity as GenericWriterEntity;
217            if (entity == null)
218            {
219                return;
220            }
221
222            XElement properties = args.Data.Descendants(GenericTableReader.MetadataNamespace + "properties").First();
223
224            for(int iColumnn = 0; iColumnn < _edmTypeNames.Length; iColumnn++)
225            {
226                string edmTypeName = _edmTypeNames[iColumnn];
227                if (edmTypeName == null)
228                {
229                    continue;
230                }
231
232                string value = entity._source.Values[iColumnn];                
233                string columnName = _columnNames[iColumnn];
234
235                // framework will handle row + partition keys. 
236                XElement e = new XElement(GenericTableReader.DataNamespace + columnName, value);
237                e.Add(new XAttribute(GenericTableReader.MetadataNamespace + "type", edmTypeName));
238
239                properties.Add(e);
240            }            
241        }
242     
243        // Mapping of .NET types to EDM types.
244        static Dictionary<Type, string> _edmNameMapping = new Dictionary<Type, string> { 
245            { typeof(string), "Edm.String" },
246            { typeof(byte), "Edm.Byte" },
247            { typeof(sbyte), "Edm.SByte" },
248            { typeof(short), "Edm.Int16" },
249            { typeof(int), "Edm.Int32" },
250            { typeof(long), "Edm.Int64" },
251            { typeof(double), "Edm.Double" }, 
252            { typeof(float), "Edm.Single" },
253            { typeof(bool), "Edm.Boolean" },
254            { typeof(decimal), "Edm.Decimal" },
255            { typeof(DateTime), "Edm.DateTime" },
256            { typeof(Guid), "Edm.Guid" }
257        };
258
259    }
260}