PageRenderTime 44ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/Sources/CsvReader.Excel/ExcelExtensions.cs

https://github.com/tpwalke2/DataTable
C# | 291 lines | 187 code | 38 blank | 66 comment | 27 complexity | 6b495a9c9a149c0d77798bbd37645f4c MD5 | raw file
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using DocumentFormat.OpenXml.Packaging;
  5. using DocumentFormat.OpenXml.Spreadsheet;
  6. using System.IO;
  7. namespace DataAccess
  8. {
  9. public static class ExcelExtensions
  10. {
  11. /// <summary>
  12. /// Reads the first worksheet in the .xlsx file and returns it. This only supports .xlsx files (Office 2007, with open xml standard)
  13. /// and not .xls files (which had a closed file format that required COM).
  14. /// Also supports reading .csv files.
  15. /// This is safe to use on a server.
  16. /// </summary>
  17. /// <param name="builder"></param>
  18. /// <param name="filename">filename </param>
  19. /// <returns>table for the first sheet in the workbook. Table's name is the sheet name.</returns>
  20. public static MutableDataTable ReadExcel(this DataTableBuilder builder, string filename)
  21. {
  22. // For convenience
  23. if (filename.EndsWith(".csv", StringComparison.OrdinalIgnoreCase))
  24. {
  25. return builder.ReadCsv(filename);
  26. }
  27. using (Stream input = new FileStream(filename, FileMode.Open, FileAccess.Read))
  28. {
  29. return builder.ReadExcel(input);
  30. }
  31. }
  32. /// <summary>
  33. /// Reads the first worksheet in the .xlsx file and returns it. This only supports .xlsx files (Office 2007, with open xml standard)
  34. /// and not .xls files (which had a closed file format that required COM).
  35. /// This is safe to use on a server.
  36. /// </summary>
  37. /// <param name="builder"></param>
  38. /// <param name="input">stream to read file from</param>
  39. /// <returns>table for the first sheet in the workbook. Table's name is the sheet name.</returns>
  40. public static MutableDataTable ReadExcel(this DataTableBuilder builder, Stream input)
  41. {
  42. // See http://msdn.microsoft.com/en-us/library/hh298534.aspx
  43. using (SpreadsheetDocument document = SpreadsheetDocument.Open(input, isEditable: false))
  44. {
  45. // Retrieve a reference to the workbook part.
  46. WorkbookPart wbPart = document.WorkbookPart;
  47. // Get the first sheet
  48. foreach (Sheet sheet in wbPart.Workbook.Descendants<Sheet>())
  49. {
  50. MutableDataTable dt = ReadSheet(wbPart, sheet);
  51. if (dt != null)
  52. {
  53. return dt;
  54. }
  55. }
  56. }
  57. throw new InvalidOperationException("Excel file is either empty or does not have a valid table in it.");
  58. }
  59. /// <summary>
  60. /// Reads all sheets in the excel workbook and returns as a ordered collection of data tables.
  61. /// </summary>
  62. /// <param name="builder">placeholder</param>
  63. /// <param name="filename">excel file to load</param>
  64. /// <returns>Ordered collection of tables corresponding to non-empty sheets. Table name corresponds to sheet name.</returns>
  65. public static IList<MutableDataTable> ReadExcelAllSheets(this DataTableBuilder builder, string filename)
  66. {
  67. using (Stream input = new FileStream(filename, FileMode.Open, FileAccess.Read))
  68. {
  69. return builder.ReadExcelAllSheets(input);
  70. }
  71. }
  72. /// <summary>
  73. /// Reads all sheets in the excel workbook and returns as a ordered collection of data tables.
  74. /// </summary>
  75. /// <param name="builder">placeholder</param>
  76. /// <param name="input">stream to read from</param>
  77. /// <returns>Ordered collection of tables corresponding to non-empty sheets. Table name corresponds to sheet name.</returns>
  78. public static IList<MutableDataTable> ReadExcelAllSheets(this DataTableBuilder builder, Stream input)
  79. {
  80. List<MutableDataTable> list = new List<MutableDataTable>();
  81. // See http://msdn.microsoft.com/en-us/library/hh298534.aspx
  82. using (SpreadsheetDocument document = SpreadsheetDocument.Open(input, isEditable: false))
  83. {
  84. // Retrieve a reference to the workbook part.
  85. WorkbookPart wbPart = document.WorkbookPart;
  86. // Get the first sheet
  87. foreach (Sheet sheet in wbPart.Workbook.Descendants<Sheet>())
  88. {
  89. MutableDataTable dt = ReadSheet(wbPart, sheet);
  90. if (dt != null)
  91. {
  92. list.Add(dt);
  93. }
  94. }
  95. }
  96. return list;
  97. }
  98. // Read the excel sheet from the workbook and return as a data table.
  99. // Return null if sheet is empty.
  100. private static MutableDataTable ReadSheet(WorkbookPart wbPart, Sheet sheet)
  101. {
  102. string sheetName = sheet.Name.Value;
  103. // Retrieve a reference to the worksheet part.
  104. WorksheetPart wsPart = (WorksheetPart)(wbPart.GetPartById(sheet.Id));
  105. IEnumerable<Cell> cells = wsPart.Worksheet.Descendants<Cell>();
  106. Dictionary2d<int, int, string> vals = new Dictionary2d<int, int, string>();
  107. foreach (Cell c in cells)
  108. {
  109. var val = CellToText(wbPart, c);
  110. var loc = c.CellReference;
  111. var loc2 = ParseRef(loc);
  112. int columnId = loc2.Item1;
  113. int rowId = loc2.Item2;
  114. vals[rowId, columnId] = val;
  115. }
  116. if (vals.Count > 0)
  117. {
  118. MutableDataTable dt = ToTable(vals);
  119. dt.Name = sheetName;
  120. return dt;
  121. }
  122. return null;
  123. }
  124. // skip access
  125. private static MutableDataTable ToTable<TValue>(Dictionary2d<int, int, TValue> dict)
  126. {
  127. // TKey1 is rows, TKey2 is values.
  128. MutableDataTable d = new MutableDataTable();
  129. var rows = dict.Key1;
  130. int count = rows.Count() - 1;
  131. // Set columns
  132. var columns = dict.Key2.ToArray();
  133. {
  134. Column[] cs = new Column[columns.Length];
  135. for (int ic = 0; ic < columns.Length; ic++)
  136. {
  137. string columnName = dict[0, columns[ic]].ToString(); ;
  138. cs[ic] = new Column(columnName, count);
  139. }
  140. d.Columns = cs;
  141. }
  142. // Add rows
  143. int i = 0;
  144. foreach (var row in rows)
  145. {
  146. i++;
  147. if (i == 1)
  148. {
  149. continue; // skip 1st row, header
  150. }
  151. for (int ic = 0; ic < columns.Length; ic++)
  152. {
  153. var value = dict[row, columns[ic]];
  154. string s = (value == null) ? string.Empty : value.ToString();
  155. d.Columns[ic].Values[i - 2] = s;
  156. }
  157. }
  158. return d;
  159. }
  160. // This function from:
  161. // http://msdn.microsoft.com/en-us/library/hh298534.aspx
  162. static string CellToText(WorkbookPart wbPart, Cell theCell)
  163. {
  164. // If the cell does not exist, return an empty string.
  165. if (theCell == null)
  166. {
  167. return string.Empty;
  168. }
  169. string value = theCell.InnerText;
  170. // If the cell represents an integer number, you are done.
  171. // For dates, this code returns the serialized value that
  172. // represents the date. The code handles strings and
  173. // Booleans individually. For shared strings, the code
  174. // looks up the corresponding value in the shared string
  175. // table. For Booleans, the code converts the value into
  176. // the words TRUE or FALSE.
  177. if (theCell.DataType != null)
  178. {
  179. switch (theCell.DataType.Value)
  180. {
  181. case CellValues.SharedString:
  182. // For shared strings, look up the value in the
  183. // shared strings table.
  184. var stringTable =
  185. wbPart.GetPartsOfType<SharedStringTablePart>()
  186. .FirstOrDefault();
  187. // If the shared string table is missing, something
  188. // is wrong. Return the index that is in
  189. // the cell. Otherwise, look up the correct text in
  190. // the table.
  191. if (stringTable != null)
  192. {
  193. return stringTable.SharedStringTable.ElementAt(int.Parse(value)).InnerText;
  194. }
  195. break;
  196. case CellValues.Boolean:
  197. switch (value)
  198. {
  199. case "0":
  200. return "FALSE";
  201. default:
  202. return "TRUE";
  203. }
  204. } // end switch
  205. }
  206. // InnerText will show Table formulas. We want the actual computed value.
  207. if (theCell.CellValue == null)
  208. {
  209. // may happen if a cell is empty
  210. return string.Empty;
  211. }
  212. return theCell.CellValue.Text;
  213. }
  214. // Parse ref to loc,
  215. // Returns 0-based values (column, row)
  216. // "B3" --> (), "AA32" --> (1+26
  217. private static Tuple<int, int> ParseRef(string loc)
  218. {
  219. int column = 0;
  220. for (int idx = 0; idx < loc.Length; idx++)
  221. {
  222. char ch = loc[idx];
  223. int val = ConvertLetter(ch);
  224. if (val < 0)
  225. {
  226. // end of letter portion. Rest should be a number.
  227. string rest = loc.Substring(idx);
  228. int row = int.Parse(rest);
  229. return System.Tuple.Create(column - 1, row - 1);
  230. }
  231. else
  232. {
  233. column *= 26;
  234. column += (val + 1);
  235. }
  236. }
  237. // Error! there was no row value.
  238. throw new InvalidOperationException("illegal location value:" + loc);
  239. }
  240. private static int ConvertLetter(char ch)
  241. {
  242. if (ch >= 'A' && ch <= 'Z')
  243. {
  244. return ch - 'A';
  245. }
  246. if (ch >= 'a' && ch <= 'z')
  247. {
  248. return ch - 'a';
  249. }
  250. return -1;
  251. }
  252. }
  253. }