PageRenderTime 57ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/Sources/DataAccess/DataTableBuilder.cs

https://github.com/tpwalke2/DataTable
C# | 234 lines | 114 code | 27 blank | 93 comment | 21 complexity | 400b1b56d8cfc5332f2e991db8e5f3d5 MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. using System.IO;
  5. using System.Diagnostics;
  6. using System.Text.RegularExpressions;
  7. using System.Linq;
  8. namespace DataAccess
  9. {
  10. /// <summary>
  11. /// Empty class. Just exists to hang extension methods off.
  12. /// </summary>
  13. public class DataTableBuilder
  14. {
  15. }
  16. /// <summary>
  17. /// Provide extension methods for creating tables.
  18. /// Use extensions methods (instead of ctors) because they're discoverable and extendable.
  19. /// All extensions methods should follow the convention:
  20. /// - returning a table.
  21. /// - Use "Lazy" if the table is not in-memory.
  22. ///
  23. /// Example usage:
  24. /// DataTable dt = DataTable.New.FromCsv()
  25. /// </summary>
  26. public static class DataTableBuilderExtensions
  27. {
  28. /// <summary>
  29. /// Read an entire CSV file into memory.
  30. /// </summary>
  31. /// <param name="builder">ignored</param>
  32. /// <param name="filename">filename of CSV file to load</param>
  33. /// <returns>a mutable in-memory DataTable for the given CSV file</returns>
  34. public static MutableDataTable ReadCsv(this DataTableBuilder builder, string filename)
  35. {
  36. Debug.Assert(builder != null);
  37. if (filename == null)
  38. {
  39. throw new ArgumentNullException("filename");
  40. }
  41. return Read(builder, filename);
  42. }
  43. /// <summary>
  44. /// Read a file into memory.
  45. /// Infer the schema from the header row. Biased to CSV, but may handle tab delimeters too.
  46. /// </summary>
  47. /// <param name="builder">ignored</param>
  48. /// <param name="filename">filename to load</param>
  49. /// <returns>a new in-memory table</returns>
  50. public static MutableDataTable Read(this DataTableBuilder builder, string filename)
  51. {
  52. Debug.Assert(builder != null);
  53. if (filename == null)
  54. {
  55. throw new ArgumentNullException("filename");
  56. }
  57. return Reader.Read(filename);
  58. }
  59. /// <summary>
  60. /// Read a table from the stream into memory.
  61. /// Infer the schema from the header row. Biased to CSV, but may handle tab delimeters too.
  62. /// </summary>
  63. /// <param name="builder">ignored</param>
  64. /// <param name="stream">input stream to read from</param>
  65. /// <returns>a new in-memory table</returns>
  66. public static MutableDataTable Read(this DataTableBuilder builder, TextReader stream)
  67. {
  68. return Read(builder, stream, ',');
  69. }
  70. /// <summary>
  71. /// Read a table from the stream into memory.
  72. /// Infer the schema from the header row. Biased to CSV, but may handle tab delimeters too.
  73. /// </summary>
  74. /// <param name="builder">ignored</param>
  75. /// <param name="stream">input stream to read from</param>
  76. /// <param name="delimiter">delimiter characeter to use for separatior</param>
  77. /// <returns>a new in-memory table</returns>
  78. public static MutableDataTable Read(this DataTableBuilder builder, TextReader stream, char delimiter)
  79. {
  80. Debug.Assert(builder != null);
  81. if (stream == null)
  82. {
  83. throw new ArgumentNullException("stream");
  84. }
  85. return Reader.Read(stream, delimiter);
  86. }
  87. /// <summary>
  88. /// Gets a mutable in-memory copy of the given data table.
  89. /// </summary>
  90. /// <param name="builder">ignored</param>
  91. /// <param name="source">source table that will get copied</param>
  92. /// <returns>a new table</returns>
  93. public static MutableDataTable GetMutableCopy(this DataTableBuilder builder, DataTable source)
  94. {
  95. Debug.Assert(builder != null);
  96. if (source == null)
  97. {
  98. throw new ArgumentNullException("source");
  99. }
  100. return Utility.ToMutable(source);
  101. }
  102. /// <summary>
  103. /// Return an in-memory table that contains the topN rows from the table in the filename.
  104. /// </summary>
  105. /// <param name="builder">ignored</param>
  106. /// <param name="filename">filename of table to load. Schema is inferred from header row.</param>
  107. /// <returns>a in-memory table containing the topN rows from the supplied file.</returns>
  108. public static MutableDataTable ReadSampleTopN(this DataTableBuilder builder, string filename)
  109. {
  110. return ReadSampleTopN(builder, filename, 100);
  111. }
  112. /// <summary>
  113. /// Return an in-memory table that contains the topN rows from the table in the filename.
  114. /// </summary>
  115. /// <param name="builder">ignored</param>
  116. /// <param name="filename">filename of table to load. Schema is inferred from header row.</param>
  117. /// <param name="topN">reads the topN rows from the table.</param>
  118. /// <returns>a in-memory table containing the topN rows from the supplied file.</returns>
  119. public static MutableDataTable ReadSampleTopN(this DataTableBuilder builder, string filename, int topN = 100)
  120. {
  121. Debug.Assert(builder != null);
  122. if (filename == null)
  123. {
  124. throw new ArgumentNullException("filename");
  125. }
  126. DataTable source = new StreamingDataTable(filename);
  127. MutableDataTable dt = Analyze.SampleTopN(source, topN);
  128. return dt;
  129. }
  130. /// <summary>
  131. /// Return a streaming data table over a file. This just reads a row at a time and avoids reading the whole
  132. /// table into memory. But it only provides sequential read-only access.
  133. /// </summary>
  134. /// <param name="builder"></param>
  135. /// <param name="filename">filename of CSV to read</param>
  136. /// <returns>a streaming data table for the given filename</returns>
  137. public static DataTable ReadLazy(this DataTableBuilder builder, string filename)
  138. {
  139. Debug.Assert(builder != null);
  140. return new StreamingDataTable(filename) { Name = filename };
  141. }
  142. /// <summary>
  143. /// Create an in-memory table with 2 columns (key and value), where each row is a KeyValuePair from the dictionary.
  144. /// </summary>
  145. /// <typeparam name="TKey">TKey of dictionary</typeparam>
  146. /// <typeparam name="TValue">TValue of dictionary</typeparam>
  147. /// <param name="builder">ignored</param>
  148. /// <param name="dict">source of data</param>
  149. /// <param name="keyName">name for column that holds the dictionary keys</param>
  150. /// <param name="valName">name for column that holds the dictionary values</param>
  151. /// <returns>an in-memory table</returns>
  152. public static MutableDataTable FromDictionary<TKey, TValue>(this DataTableBuilder builder, IDictionary<TKey, TValue> dict, string keyName, string valName)
  153. {
  154. Debug.Assert(builder != null);
  155. MutableDataTable d = new MutableDataTable();
  156. int count = dict.Count;
  157. Column cKeys = new Column(keyName, count);
  158. Column cVals = new Column(valName, count);
  159. d.Columns = new Column[] { cKeys, cVals };
  160. int i = 0;
  161. foreach (var kv in dict)
  162. {
  163. cKeys.Values[i] = kv.Key.ToString();
  164. cVals.Values[i] = kv.Value.ToString();
  165. i++;
  166. }
  167. return d;
  168. }
  169. /// <summary>
  170. /// Copy the 2d-dictionary into a in-memory table. This is ideal for creating a sparse table from a dictionary.
  171. /// Column names are inferred from key values.
  172. /// </summary>
  173. public static MutableDataTable From2dDictionary<TKeyRow, TKeyColumn, TValue>(this DataTableBuilder builder, Dictionary2d<TKeyRow, TKeyColumn, TValue> dict)
  174. {
  175. Debug.Assert(builder != null);
  176. return Utility.ToTable(dict);
  177. }
  178. /// <summary>
  179. /// Create an in-memory table from the tuple collection.
  180. /// Pass in column names since Tuple properties are just named Item1 and Item2.
  181. /// </summary>
  182. public static MutableDataTable FromTuple<T1, T2>(this DataTableBuilder builder, Tuple<T1, T2>[] a, string columnName1, string columnName2)
  183. {
  184. Debug.Assert(builder != null);
  185. return Utility.ToTable(a, columnName1, columnName2);
  186. }
  187. /// <summary>
  188. /// Create an in-memory table where each row is an item in the enumeration.
  189. /// The columns are from the "flattened" properties of the T (not fields).
  190. /// The column names are inferred from T's property names.
  191. /// </summary>
  192. public static MutableDataTable FromEnumerable<T>(this DataTableBuilder builder, IEnumerable<T> a)
  193. {
  194. Debug.Assert(builder != null);
  195. string[] columnNames = Utility.InferColumnNames<T>();
  196. return Utility.ToTableX<T>(a, columnNames);
  197. }
  198. /// <summary>
  199. /// Create a lazy table around the enumeration.
  200. /// </summary>
  201. public static DataTable FromEnumerableLazy<T>(this DataTableBuilder builder, IEnumerable<T> items)
  202. {
  203. Debug.Assert(builder != null);
  204. return new EnumerableDataTable<T>(items);
  205. }
  206. }
  207. }