PageRenderTime 47ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/csharp/kevint/CsvReader.cs

https://bitbucket.org/apauley/hollingberries
C# | 639 lines | 435 code | 71 blank | 133 comment | 65 complexity | 4e04fe4a97ef6405ddde359489b2af6c MD5 | raw file
Possible License(s): BSD-3-Clause
  1. /* CSVReader - a simple open source C# class library to read CSV data
  2. * by Andrew Stellman - http://www.stellman-greene.com/CSVReader
  3. *
  4. * CSVReader.cs - Class to read CSV data from a string, file or stream
  5. *
  6. * download the latest version: http://svn.stellman-greene.com/CSVReader
  7. *
  8. * (c) 2008, Stellman & Greene Consulting
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without
  12. * modification, are permitted provided that the following conditions are met:
  13. * * Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * * Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. * * Neither the name of Stellman & Greene Consulting nor the
  19. * names of its contributors may be used to endorse or promote products
  20. * derived from this software without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY STELLMAN & GREENE CONSULTING ''AS IS'' AND ANY
  23. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  24. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  25. * DISCLAIMED. IN NO EVENT SHALL STELLMAN & GREENE CONSULTING BE LIABLE FOR ANY
  26. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  27. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  29. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  31. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. *
  33. */
  34. using System;
  35. using System.Collections.Generic;
  36. using System.IO;
  37. using System.Data;
  38. using System.Text;
  39. namespace Com.StellmanGreene.CSVReader
  40. {
  41. /// <summary>
  42. /// Read CSV-formatted data from a file or TextReader
  43. /// </summary>
  44. public class CSVReader : IDisposable
  45. {
  46. public const string NEWLINE = "\n";
  47. /// <summary>
  48. /// This reader will read all of the CSV data
  49. /// </summary>
  50. private BinaryReader reader;
  51. /// <summary>
  52. /// The number of rows to scan for types when building a DataTable (0 to scan the whole file)
  53. /// </summary>
  54. public int ScanRows = 0;
  55. #region Constructors
  56. /// <summary>
  57. /// Read CSV-formatted data from a file
  58. /// </summary>
  59. /// <param name="filename">Name of the CSV file</param>
  60. public CSVReader(FileInfo csvFileInfo)
  61. {
  62. if (csvFileInfo == null)
  63. throw new ArgumentNullException("Null FileInfo passed to CSVReader");
  64. this.reader = new BinaryReader(File.OpenRead(csvFileInfo.FullName));
  65. }
  66. /// <summary>
  67. /// Read CSV-formatted data from a string
  68. /// </summary>
  69. /// <param name="csvData">String containing CSV data</param>
  70. public CSVReader(string csvData)
  71. {
  72. if (csvData == null)
  73. throw new ArgumentNullException("Null string passed to CSVReader");
  74. this.reader = new BinaryReader(new MemoryStream(System.Text.Encoding.UTF8.GetBytes(csvData)));
  75. }
  76. /// <summary>
  77. /// Read CSV-formatted data from a TextReader
  78. /// </summary>
  79. /// <param name="reader">TextReader that's reading CSV-formatted data</param>
  80. public CSVReader(TextReader reader)
  81. {
  82. if (reader == null)
  83. throw new ArgumentNullException("Null TextReader passed to CSVReader");
  84. this.reader = new BinaryReader(new MemoryStream(System.Text.Encoding.UTF8.GetBytes(reader.ReadToEnd())));
  85. }
  86. #endregion
  87. string currentLine = "";
  88. /// <summary>
  89. /// Read the next row from the CSV data
  90. /// </summary>
  91. /// <returns>A list of objects read from the row, or null if there is no next row</returns>
  92. public List<object> ReadRow()
  93. {
  94. // ReadLine() will return null if there's no next line
  95. if (reader.BaseStream.Position >= reader.BaseStream.Length)
  96. return null;
  97. StringBuilder builder = new StringBuilder();
  98. // Read the next line
  99. while ((reader.BaseStream.Position < reader.BaseStream.Length) && (!builder.ToString().EndsWith(NEWLINE)))
  100. {
  101. char c = reader.ReadChar();
  102. builder.Append(c);
  103. }
  104. currentLine = builder.ToString();
  105. if (currentLine.EndsWith(NEWLINE))
  106. currentLine = currentLine.Remove(currentLine.IndexOf(NEWLINE), NEWLINE.Length);
  107. // Build the list of objects in the line
  108. List<object> objects = new List<object>();
  109. while (currentLine != "")
  110. objects.Add(ReadNextObject());
  111. return objects;
  112. }
  113. /// <summary>
  114. /// Read the next object from the currentLine string
  115. /// </summary>
  116. /// <returns>The next object in the currentLine string</returns>
  117. private object ReadNextObject()
  118. {
  119. if (currentLine == null)
  120. return null;
  121. // Check to see if the next value is quoted
  122. bool quoted = false;
  123. if (currentLine.StartsWith("\""))
  124. quoted = true;
  125. // Find the end of the next value
  126. string nextObjectString = "";
  127. int i = 0;
  128. int len = currentLine.Length;
  129. bool foundEnd = false;
  130. while (!foundEnd && i <= len)
  131. {
  132. // Check if we've hit the end of the string
  133. if ((!quoted && i == len) // non-quoted strings end with a comma or end of line
  134. || (!quoted && currentLine.Substring(i, 1) == ",")
  135. // quoted strings end with a quote followed by a comma or end of line
  136. || (quoted && i == len - 1 && currentLine.EndsWith("\""))
  137. || (quoted && currentLine.Substring(i, 2) == "\","))
  138. foundEnd = true;
  139. else
  140. i++;
  141. }
  142. if (quoted)
  143. {
  144. if (i > len || !currentLine.Substring(i, 1).StartsWith("\""))
  145. throw new FormatException("Invalid CSV format: " + currentLine.Substring(0, i));
  146. i++;
  147. }
  148. nextObjectString = currentLine.Substring(0, i).Replace("\"\"", "\"");
  149. if (i < len)
  150. currentLine = currentLine.Substring(i + 1);
  151. else
  152. currentLine = "";
  153. if (quoted)
  154. {
  155. if (nextObjectString.StartsWith("\""))
  156. nextObjectString = nextObjectString.Substring(1);
  157. if (nextObjectString.EndsWith("\""))
  158. nextObjectString = nextObjectString.Substring(0, nextObjectString.Length - 1);
  159. return nextObjectString;
  160. }
  161. else
  162. {
  163. object convertedValue;
  164. StringConverter.ConvertString(nextObjectString, out convertedValue);
  165. return convertedValue;
  166. }
  167. }
  168. /// <summary>
  169. /// Read the row data read using repeated ReadRow() calls and build a DataColumnCollection with types and column names
  170. /// </summary>
  171. /// <param name="headerRow">True if the first row contains headers</param>
  172. /// <returns>System.Data.DataTable object populated with the row data</returns>
  173. public DataTable CreateDataTable(bool headerRow)
  174. {
  175. // Read the CSV data into rows
  176. List<List<object>> rows = new List<List<object>>();
  177. List<object> readRow = null;
  178. while ((readRow = ReadRow()) != null)
  179. rows.Add(readRow);
  180. // The types and names (if headerRow is true) will be stored in these lists
  181. List<Type> columnTypes = new List<Type>();
  182. List<string> columnNames = new List<string>();
  183. // Read the column names from the header row (if there is one)
  184. if (headerRow)
  185. foreach (object name in rows[0])
  186. columnNames.Add(name.ToString());
  187. // Read the column types from each row in the list of rows
  188. bool headerRead = false;
  189. foreach (List<object> row in rows)
  190. if (headerRead || !headerRow)
  191. for (int i = 0; i < row.Count; i++)
  192. // If we're adding a new column to the columnTypes list, use its type.
  193. // Otherwise, find the common type between the one that's there and the new row.
  194. if (columnTypes.Count < i + 1)
  195. columnTypes.Add(row[i].GetType());
  196. else
  197. columnTypes[i] = StringConverter.FindCommonType(columnTypes[i], row[i].GetType());
  198. else
  199. headerRead = true;
  200. // Create the table and add the columns
  201. DataTable table = new DataTable();
  202. for (int i = 0; i < columnTypes.Count; i++)
  203. {
  204. table.Columns.Add();
  205. table.Columns[i].DataType = columnTypes[i];
  206. if (i < columnNames.Count)
  207. table.Columns[i].ColumnName = columnNames[i];
  208. }
  209. // Add the data from the rows
  210. headerRead = false;
  211. foreach (List<object> row in rows)
  212. if (headerRead || !headerRow)
  213. {
  214. DataRow dataRow = table.NewRow();
  215. for (int i = 0; i < row.Count; i++)
  216. dataRow[i] = row[i];
  217. table.Rows.Add(dataRow);
  218. }
  219. else
  220. headerRead = true;
  221. return table;
  222. }
  223. /// <summary>
  224. /// Read a CSV file into a table
  225. /// </summary>
  226. /// <param name="filename">Filename of CSV file</param>
  227. /// <param name="headerRow">True if the first row contains column names</param>
  228. /// <param name="scanRows">The number of rows to scan for types when building a DataTable (0 to scan the whole file)</param>
  229. /// <returns>System.Data.DataTable object that contains the CSV data</returns>
  230. public static DataTable ReadCSVFile(string filename, bool headerRow, int scanRows)
  231. {
  232. using (CSVReader reader = new CSVReader(new FileInfo(filename)))
  233. {
  234. reader.ScanRows = scanRows;
  235. return reader.CreateDataTable(headerRow);
  236. }
  237. }
  238. /// <summary>
  239. /// Read a CSV file into a table
  240. /// </summary>
  241. /// <param name="filename">Filename of CSV file</param>
  242. /// <param name="headerRow">True if the first row contains column names</param>
  243. /// <returns>System.Data.DataTable object that contains the CSV data</returns>
  244. public static DataTable ReadCSVFile(string filename, bool headerRow)
  245. {
  246. using (CSVReader reader = new CSVReader(new FileInfo(filename)))
  247. return reader.CreateDataTable(headerRow);
  248. }
  249. #region IDisposable Members
  250. public void Dispose()
  251. {
  252. if (reader != null)
  253. {
  254. try
  255. {
  256. // Can't call BinaryReader.Dispose due to its protection level
  257. reader.Close();
  258. }
  259. catch { }
  260. }
  261. }
  262. #endregion
  263. }
  264. /// <summary>
  265. /// Static class to convert strings to typed values
  266. /// </summary>
  267. public static class StringConverter
  268. {
  269. public static Type ConvertString(string value, out object convertedValue)
  270. {
  271. // First check the whole number types, because floating point types will always parse whole numbers
  272. // Start with the smallest types
  273. byte byteResult;
  274. if (byte.TryParse(value, out byteResult))
  275. {
  276. convertedValue = byteResult;
  277. return typeof (byte);
  278. }
  279. short shortResult;
  280. if (short.TryParse(value, out shortResult))
  281. {
  282. convertedValue = shortResult;
  283. return typeof (short);
  284. }
  285. int intResult;
  286. if (int.TryParse(value, out intResult))
  287. {
  288. convertedValue = intResult;
  289. return typeof (int);
  290. }
  291. long longResult;
  292. if (long.TryParse(value, out longResult))
  293. {
  294. convertedValue = longResult;
  295. return typeof (long);
  296. }
  297. ulong ulongResult;
  298. if (ulong.TryParse(value, out ulongResult))
  299. {
  300. convertedValue = ulongResult;
  301. return typeof (ulong);
  302. }
  303. // No need to check the rest of the unsigned types, which will fit into the signed whole number types
  304. // Next check the floating point types
  305. float floatResult;
  306. if (float.TryParse(value, out floatResult))
  307. {
  308. convertedValue = floatResult;
  309. return typeof (float);
  310. }
  311. // It's not clear that there's anything that double.TryParse() and decimal.TryParse() will parse
  312. // but which float.TryParse() won't
  313. double doubleResult;
  314. if (double.TryParse(value, out doubleResult))
  315. {
  316. convertedValue = doubleResult;
  317. return typeof (double);
  318. }
  319. decimal decimalResult;
  320. if (decimal.TryParse(value, out decimalResult))
  321. {
  322. convertedValue = decimalResult;
  323. return typeof (decimal);
  324. }
  325. // It's not a number, so it's either a bool, char or string
  326. bool boolResult;
  327. if (bool.TryParse(value, out boolResult))
  328. {
  329. convertedValue = boolResult;
  330. return typeof (bool);
  331. }
  332. char charResult;
  333. if (char.TryParse(value, out charResult))
  334. {
  335. convertedValue = charResult;
  336. return typeof (char);
  337. }
  338. convertedValue = value;
  339. return typeof (string);
  340. }
  341. /// <summary>
  342. /// Compare two types and find a type that can fit both of them
  343. /// </summary>
  344. /// <param name="typeA">First type to compare</param>
  345. /// <param name="typeB">Second type to compare</param>
  346. /// <returns>The type that can fit both types, or string if they're incompatible</returns>
  347. public static Type FindCommonType(Type typeA, Type typeB)
  348. {
  349. // Build the singleton type map (which will rebuild it in a typesafe manner
  350. // if it's not already built).
  351. BuildTypeMap();
  352. if (!typeMap.ContainsKey(typeA))
  353. return typeof (string);
  354. if (!typeMap[typeA].ContainsKey(typeB))
  355. return typeof (string);
  356. return typeMap[typeA][typeB];
  357. }
  358. // Dictionary to map two types to a common type that can hold both of them
  359. private static Dictionary<Type, Dictionary<Type, Type>> typeMap = null;
  360. // Locker object to build the singleton typeMap in a typesafe manner
  361. private static object locker = new object();
  362. /// <summary>
  363. /// Build the singleton type map in a typesafe manner.
  364. /// This map is a dictionary that maps a pair of types to a common type.
  365. /// So typeMap[typeof(float)][typeof(uint)] will return float, while
  366. /// typemap[typeof(char)][typeof(bool)] will return string.
  367. /// </summary>
  368. private static void BuildTypeMap()
  369. {
  370. lock (locker)
  371. {
  372. if (typeMap == null)
  373. {
  374. typeMap = new Dictionary<Type, Dictionary<Type, Type>>()
  375. {
  376. // Comparing byte
  377. {
  378. typeof (byte), new Dictionary<Type, Type>()
  379. {
  380. {typeof (byte), typeof (byte)},
  381. {typeof (short), typeof (short)},
  382. {typeof (int), typeof (int)},
  383. {typeof (long), typeof (long)},
  384. {typeof (ulong), typeof (ulong)},
  385. {typeof (float), typeof (float)},
  386. {typeof (double), typeof (double)},
  387. {typeof (decimal), typeof (decimal)},
  388. {typeof (bool), typeof (string)},
  389. {typeof (char), typeof (string)},
  390. {typeof (string), typeof (string)},
  391. }
  392. },
  393. // Comparing short
  394. {
  395. typeof (short), new Dictionary<Type, Type>()
  396. {
  397. {typeof (byte), typeof (short)},
  398. {typeof (short), typeof (short)},
  399. {typeof (int), typeof (int)},
  400. {typeof (long), typeof (long)},
  401. {typeof (ulong), typeof (ulong)},
  402. {typeof (float), typeof (float)},
  403. {typeof (double), typeof (double)},
  404. {typeof (decimal), typeof (decimal)},
  405. {typeof (bool), typeof (string)},
  406. {typeof (char), typeof (string)},
  407. {typeof (string), typeof (string)},
  408. }
  409. },
  410. // Comparing int
  411. {
  412. typeof (int), new Dictionary<Type, Type>()
  413. {
  414. {typeof (byte), typeof (int)},
  415. {typeof (short), typeof (int)},
  416. {typeof (int), typeof (int)},
  417. {typeof (long), typeof (long)},
  418. {typeof (ulong), typeof (ulong)},
  419. {typeof (float), typeof (float)},
  420. {typeof (double), typeof (double)},
  421. {typeof (decimal), typeof (decimal)},
  422. {typeof (bool), typeof (string)},
  423. {typeof (char), typeof (string)},
  424. {typeof (string), typeof (string)},
  425. }
  426. },
  427. // Comparing long
  428. {
  429. typeof (long), new Dictionary<Type, Type>()
  430. {
  431. {typeof (byte), typeof (long)},
  432. {typeof (short), typeof (long)},
  433. {typeof (int), typeof (long)},
  434. {typeof (long), typeof (long)},
  435. {typeof (ulong), typeof (ulong)},
  436. {typeof (float), typeof (float)},
  437. {typeof (double), typeof (double)},
  438. {typeof (decimal), typeof (decimal)},
  439. {typeof (bool), typeof (string)},
  440. {typeof (char), typeof (string)},
  441. {typeof (string), typeof (string)},
  442. }
  443. },
  444. // Comparing ulong
  445. {
  446. typeof (ulong), new Dictionary<Type, Type>()
  447. {
  448. {typeof (byte), typeof (ulong)},
  449. {typeof (short), typeof (ulong)},
  450. {typeof (int), typeof (ulong)},
  451. {typeof (long), typeof (ulong)},
  452. {typeof (ulong), typeof (ulong)},
  453. {typeof (float), typeof (float)},
  454. {typeof (double), typeof (double)},
  455. {typeof (decimal), typeof (decimal)},
  456. {typeof (bool), typeof (string)},
  457. {typeof (char), typeof (string)},
  458. {typeof (string), typeof (string)},
  459. }
  460. },
  461. // Comparing float
  462. {
  463. typeof (float), new Dictionary<Type, Type>()
  464. {
  465. {typeof (byte), typeof (float)},
  466. {typeof (short), typeof (float)},
  467. {typeof (int), typeof (float)},
  468. {typeof (long), typeof (float)},
  469. {typeof (ulong), typeof (float)},
  470. {typeof (float), typeof (float)},
  471. {typeof (double), typeof (double)},
  472. {typeof (decimal), typeof (decimal)},
  473. {typeof (bool), typeof (string)},
  474. {typeof (char), typeof (string)},
  475. {typeof (string), typeof (string)},
  476. }
  477. },
  478. // Comparing double
  479. {
  480. typeof (double), new Dictionary<Type, Type>()
  481. {
  482. {typeof (byte), typeof (double)},
  483. {typeof (short), typeof (double)},
  484. {typeof (int), typeof (double)},
  485. {typeof (long), typeof (double)},
  486. {typeof (ulong), typeof (double)},
  487. {typeof (float), typeof (double)},
  488. {typeof (double), typeof (double)},
  489. {typeof (decimal), typeof (decimal)},
  490. {typeof (bool), typeof (string)},
  491. {typeof (char), typeof (string)},
  492. {typeof (string), typeof (string)},
  493. }
  494. },
  495. // Comparing decimal
  496. {
  497. typeof (decimal), new Dictionary<Type, Type>()
  498. {
  499. {typeof (byte), typeof (decimal)},
  500. {typeof (short), typeof (decimal)},
  501. {typeof (int), typeof (decimal)},
  502. {typeof (long), typeof (decimal)},
  503. {typeof (ulong), typeof (decimal)},
  504. {typeof (float), typeof (decimal)},
  505. {typeof (double), typeof (decimal)},
  506. {typeof (decimal), typeof (decimal)},
  507. {typeof (bool), typeof (string)},
  508. {typeof (char), typeof (string)},
  509. {typeof (string), typeof (string)},
  510. }
  511. },
  512. // Comparing bool
  513. {
  514. typeof (bool), new Dictionary<Type, Type>()
  515. {
  516. {typeof (byte), typeof (string)},
  517. {typeof (short), typeof (string)},
  518. {typeof (int), typeof (string)},
  519. {typeof (long), typeof (string)},
  520. {typeof (ulong), typeof (string)},
  521. {typeof (float), typeof (string)},
  522. {typeof (double), typeof (string)},
  523. {typeof (decimal), typeof (string)},
  524. {typeof (bool), typeof (bool)},
  525. {typeof (char), typeof (string)},
  526. {typeof (string), typeof (string)},
  527. }
  528. },
  529. // Comparing char
  530. {
  531. typeof (char), new Dictionary<Type, Type>()
  532. {
  533. {typeof (byte), typeof (string)},
  534. {typeof (short), typeof (string)},
  535. {typeof (int), typeof (string)},
  536. {typeof (long), typeof (string)},
  537. {typeof (ulong), typeof (string)},
  538. {typeof (float), typeof (string)},
  539. {typeof (double), typeof (string)},
  540. {typeof (decimal), typeof (string)},
  541. {typeof (bool), typeof (string)},
  542. {typeof (char), typeof (char)},
  543. {typeof (string), typeof (string)},
  544. }
  545. },
  546. // Comparing string
  547. {
  548. typeof (string), new Dictionary<Type, Type>()
  549. {
  550. {typeof (byte), typeof (string)},
  551. {typeof (short), typeof (string)},
  552. {typeof (int), typeof (string)},
  553. {typeof (long), typeof (string)},
  554. {typeof (ulong), typeof (string)},
  555. {typeof (float), typeof (string)},
  556. {typeof (double), typeof (string)},
  557. {typeof (decimal), typeof (string)},
  558. {typeof (bool), typeof (string)},
  559. {typeof (char), typeof (string)},
  560. {typeof (string), typeof (string)},
  561. }
  562. },
  563. };
  564. }
  565. }
  566. }
  567. }
  568. }