PageRenderTime 51ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/Sources/CsvReader.Azure/GenericTableWriter.cs

https://github.com/tpwalke2/DataTable
C# | 260 lines | 202 code | 32 blank | 26 comment | 49 complexity | 99b0fa233d308f58f66f41758c321092 MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. using System.Data.Services.Client;
  6. using System.Xml.Linq;
  7. using System.Xml;
  8. using System.Data.Services.Common;
  9. using Microsoft.WindowsAzure;
  10. using Microsoft.WindowsAzure.StorageClient;
  11. using System.Text.RegularExpressions;
  12. namespace DataAccess
  13. {
  14. // Bridge between a DataTable Row and a AzureTable TableServiceEntity
  15. [DataServiceKey("PartitionKey", "RowKey")]
  16. internal class GenericWriterEntity : TableServiceEntity
  17. {
  18. // Use a custom write hook to convert from a Row to EDM properties.
  19. public Row _source;
  20. }
  21. internal class GenericTableWriter
  22. {
  23. private string[] _edmTypeNames;
  24. private string[] _columnNames;
  25. private static bool Compare(string a, string b)
  26. {
  27. return string.Compare(a, b, StringComparison.OrdinalIgnoreCase) == 0;
  28. }
  29. private static bool IsSpecialColumnName(string columnName)
  30. {
  31. // Case-insensitive compare
  32. return Compare(columnName, "PartitionKey") || Compare(columnName, "RowKey") || Compare(columnName, "Timestamp");
  33. }
  34. // $$$ Should be some common helper. This is protected on Row; but should be on table.
  35. private static int GetColumnIndex(string columnName, string[] columnNames)
  36. {
  37. for (int i = 0; i < columnNames.Length; i++)
  38. {
  39. if (Compare(columnNames[i], columnName))
  40. {
  41. return i;
  42. }
  43. }
  44. return -1;
  45. }
  46. // Azure table names are very restrictive, so sanity check upfront to give a useful error.
  47. // http://msdn.microsoft.com/en-us/library/windowsazure/dd179338.aspx
  48. private static void ValidateAzureTableName(string tableName)
  49. {
  50. if (!Regex.IsMatch(tableName, "^[A-Za-z][A-Za-z0-9]{2,62}$"))
  51. {
  52. throw new InvalidOperationException(string.Format("{0} is not a valid name for an azure table", tableName));
  53. }
  54. }
  55. // Get a function that will determine the partition row key
  56. private static Func<int, Row, ParitionRowKey> GetPartitionRowKeyFunc(string[] columnNames)
  57. {
  58. // If incoming table has columns named "PartitionKey" and "RowKey", then use those.
  59. int iPartitionKey = GetColumnIndex("PartitionKey", columnNames);
  60. int iRowKey = GetColumnIndex("RowKey", columnNames);
  61. if (iPartitionKey >= 0 && iRowKey >= 0)
  62. {
  63. // Both row and partition key
  64. return (rowIndex, row) => new ParitionRowKey(row.Values[iPartitionKey], row.Values[iRowKey]);
  65. }
  66. else if ((iPartitionKey < 0) && (iRowKey >= 0))
  67. {
  68. // Only row Key
  69. return (rowIndex, row) => new ParitionRowKey("1", row.Values[iRowKey]);
  70. }
  71. else if ((iPartitionKey >= 0) && (iRowKey < 0))
  72. {
  73. // Only a partition key
  74. return (rowIndex, row) => new ParitionRowKey(row.Values[iPartitionKey], rowIndex);
  75. }
  76. else
  77. {
  78. // format rowkey so that when sorted alpanumerically, it's still ascending
  79. return (rowIndex, row) => new ParitionRowKey("1", rowIndex);
  80. }
  81. }
  82. // Write a DataTable to an AzureTable.
  83. // DataTable's Rows are an unstructured property bag.
  84. // columnTypes - type of the column, or null if column should be skipped. Length of columnTypes should be the same as number of columns.
  85. public static void SaveToAzureTable(DataTable table, CloudStorageAccount account, string tableName, Type[] columnTypes, Func<int, Row, ParitionRowKey> funcComputeKeys)
  86. {
  87. if (table == null)
  88. {
  89. throw new ArgumentNullException("table");
  90. }
  91. if (account == null)
  92. {
  93. throw new ArgumentNullException("account");
  94. }
  95. if (columnTypes == null)
  96. {
  97. throw new ArgumentNullException("columnTypes");
  98. }
  99. if (tableName == null)
  100. {
  101. throw new ArgumentNullException("tableName");
  102. }
  103. ValidateAzureTableName(tableName);
  104. // Azure tables have "special" columns.
  105. // We can skip these by settings columnType[i] to null, which means don't write that column
  106. string[] columnNames = table.ColumnNames.ToArray();
  107. if (columnNames.Length != columnTypes.Length)
  108. {
  109. throw new ArgumentException(string.Format("columnTypes should have {0} elements", columnNames.Length), "columnTypes");
  110. }
  111. columnTypes = columnTypes.ToArray(); // create a copy for mutation.
  112. for (int i = 0; i < columnNames.Length; i++)
  113. {
  114. if (IsSpecialColumnName(columnNames[i]))
  115. {
  116. columnTypes[i] = null;
  117. }
  118. }
  119. if (funcComputeKeys == null)
  120. {
  121. funcComputeKeys = GetPartitionRowKeyFunc(columnNames);
  122. }
  123. // Validate columnTypes
  124. string [] edmTypeNames = Array.ConvertAll(columnTypes,
  125. columnType => {
  126. if (columnType == null)
  127. {
  128. return null;
  129. }
  130. string edmTypeName;
  131. _edmNameMapping.TryGetValue(columnType, out edmTypeName);
  132. if (edmTypeName == null)
  133. {
  134. // Unsupported type!
  135. throw new InvalidOperationException(string.Format("Type '{0}' is not a supported type on azure tables", columnType.FullName));
  136. }
  137. return edmTypeName;
  138. });
  139. CloudTableClient tableClient = account.CreateCloudTableClient();
  140. tableClient.DeleteTableIfExist(tableName);
  141. tableClient.CreateTableIfNotExist(tableName);
  142. GenericTableWriter w = new GenericTableWriter
  143. {
  144. _edmTypeNames = edmTypeNames,
  145. _columnNames = table.ColumnNames.ToArray()
  146. };
  147. // Batch rows for performance,
  148. // but all rows in the batch must have the same partition key
  149. TableServiceContext ctx = null;
  150. string lastPartitionKey = null;
  151. int rowCounter = 0;
  152. int batchSize = 0;
  153. foreach (Row row in table.Rows)
  154. {
  155. GenericWriterEntity entity = new GenericWriterEntity { _source = row };
  156. // Compute row and partition keys too.
  157. var partRow = funcComputeKeys(rowCounter, row);
  158. entity.PartitionKey = partRow.PartitionKey;
  159. entity.RowKey = partRow.RowKey;
  160. rowCounter++;
  161. // but all rows in the batch must have the same partition key
  162. if ((ctx != null) && (lastPartitionKey != null) && (lastPartitionKey != entity.PartitionKey))
  163. {
  164. ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
  165. ctx = null;
  166. }
  167. if (ctx == null)
  168. {
  169. lastPartitionKey = null;
  170. ctx = tableClient.GetDataServiceContext();
  171. ctx.WritingEntity += new EventHandler<ReadingWritingEntityEventArgs>(w.ctx_WritingEntity);
  172. batchSize = 0;
  173. }
  174. // Add enty to the current batch
  175. ctx.AddObject(tableName, entity);
  176. lastPartitionKey = entity.PartitionKey;
  177. batchSize++;
  178. if (batchSize % 50 == 0)
  179. {
  180. ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
  181. ctx = null;
  182. }
  183. }
  184. if (ctx != null)
  185. {
  186. ctx.SaveChangesWithRetries(SaveChangesOptions.Batch | SaveChangesOptions.ReplaceOnUpdate);
  187. }
  188. }
  189. private void ctx_WritingEntity(object sender, ReadingWritingEntityEventArgs args)
  190. {
  191. GenericWriterEntity entity = args.Entity as GenericWriterEntity;
  192. if (entity == null)
  193. {
  194. return;
  195. }
  196. XElement properties = args.Data.Descendants(GenericTableReader.MetadataNamespace + "properties").First();
  197. for(int iColumnn = 0; iColumnn < _edmTypeNames.Length; iColumnn++)
  198. {
  199. string edmTypeName = _edmTypeNames[iColumnn];
  200. if (edmTypeName == null)
  201. {
  202. continue;
  203. }
  204. string value = entity._source.Values[iColumnn];
  205. string columnName = _columnNames[iColumnn];
  206. // framework will handle row + partition keys.
  207. XElement e = new XElement(GenericTableReader.DataNamespace + columnName, value);
  208. e.Add(new XAttribute(GenericTableReader.MetadataNamespace + "type", edmTypeName));
  209. properties.Add(e);
  210. }
  211. }
  212. // Mapping of .NET types to EDM types.
  213. static Dictionary<Type, string> _edmNameMapping = new Dictionary<Type, string> {
  214. { typeof(string), "Edm.String" },
  215. { typeof(byte), "Edm.Byte" },
  216. { typeof(sbyte), "Edm.SByte" },
  217. { typeof(short), "Edm.Int16" },
  218. { typeof(int), "Edm.Int32" },
  219. { typeof(long), "Edm.Int64" },
  220. { typeof(double), "Edm.Double" },
  221. { typeof(float), "Edm.Single" },
  222. { typeof(bool), "Edm.Boolean" },
  223. { typeof(decimal), "Edm.Decimal" },
  224. { typeof(DateTime), "Edm.DateTime" },
  225. { typeof(Guid), "Edm.Guid" }
  226. };
  227. }
  228. }