PageRenderTime 51ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/tab2sql/Classes/Core/TSV2DataTable.cs

http://tab2sql.googlecode.com/
C# | 308 lines | 259 code | 15 blank | 34 comment | 20 complexity | 4e2d937dcbf9c9c08b63c36f2c0c03e8 MD5 | raw file
  1. using System;
  2. using System.Data;
  3. using System.Globalization;
  4. using DGen;
  5. using NLog;
  6. namespace Tab2Sql
  7. {
  8. internal partial class TSV2DataTable
  9. {
  10. private int currentRow;
  11. private static Logger logger = LogManager.GetCurrentClassLogger ();
  12. public System.Text.StringBuilder Diagnostics;
  13. public readonly string GlobalSeparator = PrepareText.GlobalDelimiter;
  14. public DataTable Dt;
  15. public Tab2SqlObj MyObject;
  16. /// <summary>
  17. /// This is
  18. /// </summary>
  19. public Tab2sqlOptions MyOptions;
  20. /// <summary>
  21. /// Main constructor
  22. /// </summary>
  23. /// <param name="input"></param>
  24. /// <param name="myO1"></param>
  25. /// <param name="myO2"></param>
  26. public TSV2DataTable(String input, Tab2sqlOptions myO1, Tab2SqlObj myO2)
  27. {
  28. Dt = new DataTable();
  29. currentRow = 0;
  30. MyOptions = myO1;
  31. MyObject = myO2;
  32. startText = input;
  33. Dt.Locale = CultureInfo.InvariantCulture;
  34. Convert2DT();
  35. }
  36. // PUBLIC METHODS
  37. // ----------------------------------------------------------------------------
  38. /// <summary>
  39. /// Takes block of text of n lines and puts DataTable into Dt object.
  40. /// </summary>
  41. /// <param name="input"></param>
  42. private void Convert2DT()
  43. {
  44. // Vars
  45. int i = 0;
  46. var preppy = new PrepareText(startText, MyOptions.Delimiter);
  47. Diagnostics = preppy.Diagnostics;
  48. startText = preppy.PreparedText.ToString();
  49. string[] Lines = startText.Split('\n');
  50. MyObject.RowCount = Convert.ToInt32(Lines.LongLength);
  51. MyObject.ColCount = preppy.EstimatedColumns;
  52. Dt.MinimumCapacity = Convert.ToInt32(Lines.LongLength);
  53. Dt.BeginLoadData();
  54. foreach (string Line in Lines)
  55. {
  56. MyOptions.IgnoreComments = true;
  57. if (MyOptions.IgnoreComments && Line.Length > 0 && (Line[0] == '#' || Line[0] == '$'))
  58. {
  59. // meh
  60. }
  61. else // process text, it's either the first row or it's not...
  62. {
  63. if (i == 0)
  64. {
  65. DoHeaderRow(Line.TrimEnd());
  66. }
  67. if (i > 0)
  68. {
  69. DoBodyRow(Line.TrimEnd());
  70. }
  71. currentRow = i;
  72. i++;
  73. }
  74. }
  75. Dt.AcceptChanges();
  76. Dt.EndLoadData();
  77. if (MyOptions.FirstRowIsHeader)
  78. {
  79. i--;
  80. }
  81. MyObject.RowCount = i;
  82. }
  83. // PRIVATE METHODS (MAJOR)
  84. // ----------------------------------------------------------------------------
  85. // i) DoHeaderRow(LineNumber, LineContent)
  86. // ii) DoBodyRow(LineNumber, LineContent)
  87. // Note : These methods are capitalised because they were publicly accessible in previous version.
  88. /// <summary>
  89. /// insert the header into the datatable
  90. /// </summary>
  91. /// <param name="Line"></param>
  92. ///
  93. private void DoHeaderRow(string line)
  94. {
  95. string colHeader = String.Empty;
  96. int q = 0; // q = column index
  97. fields = getFields(line);
  98. int columnGap = MyObject.ColCount - fields.Length;
  99. foreach (string columnName in fields)
  100. {
  101. if (MyOptions.FirstRowIsHeader == false || columnName.Length < 1)
  102. {
  103. colHeader = "[C100" + q + "]";
  104. }
  105. else if (MyOptions.FirstRowIsHeader && columnName.Length >= 1)
  106. {
  107. colHeader = Tab2Sql.Classes.Core.tsvHelper.columnNameAdjustment(columnName, MyOptions.columnNameModel).Trim();
  108. }
  109. // this check needed because SQL won't allow two columns of the same name
  110. addColumn(colHeader, q);
  111. q++;
  112. }
  113. if (columnGap > 0)
  114. {
  115. for (int i = 0; i < columnGap; i++)
  116. {
  117. addColumn("gapColumn", i);
  118. }
  119. }
  120. MyObject.ColCount = Math.Max(MyObject.ColCount, q);
  121. }
  122. /// <summary>
  123. /// insert a row into the datatable
  124. /// </summary>
  125. /// <param name="Line">Row contents</param>
  126. private void DoBodyRow (string line)
  127. {
  128. int q = 0;
  129. // column number
  130. int qColumnNumber = 0;
  131. // column number ignoring empty columns.
  132. if (line.Trim ().Length < 2)
  133. {
  134. return;
  135. }
  136. DataRow dr = Dt.NewRow ();
  137. DataTypes currentDType = DataTypes.unknown;
  138. fields = getFields (line);
  139. int ColCounter = fields.GetLength (0);
  140. foreach (string colString in fields)
  141. {
  142. char[] c = { '"', '#' };
  143. string colValue = colString.Trim ().Trim (c);
  144. if (colValue.Length > 0)
  145. {
  146. if (colValue == "NULL")
  147. {
  148. colValue = "";
  149. }
  150. currentDType = DTypes.GuessMyType (colValue);
  151. //MyObject.ValueTypes[qColumnNumber, currentRow] = currentDType;
  152. qColumnNumber++;
  153. if (currentDType == DataTypes.integer || currentDType == DataTypes.numeric)
  154. {
  155. colValue = DText.CleanNumeric (colValue);
  156. MyObject.Highest[q] = Math.Max (MyObject.Highest[q], Convert.ToDecimal (colValue));
  157. MyObject.Lowest[q] = Math.Min (MyObject.Lowest[q], Convert.ToDecimal (colValue));
  158. }
  159. // Work out precedence. Do we stick with existing best guess or use new one?
  160. MyObject.ColumnTypes[q] = DTypes.chooseDataType (currentDType, MyObject.ColumnTypes[q]);
  161. // Is this value longer than those that went before it ?
  162. MyObject.LongestLine[q] = Math.Max (colString.Trim ().Length, MyObject.LongestLine[q]);
  163. }
  164. try
  165. {
  166. if (colString != null && colString != "NULL")
  167. {
  168. if (colString.Length > 0)
  169. {
  170. if (currentDType == DataTypes.integer || currentDType == DataTypes.numeric)
  171. {
  172. dr[q] = DGen.DText.CleanValue (colString).Trim ();
  173. }
  174. else
  175. {
  176. if (dr[q] != null)
  177. {
  178. dr[q] = colString.Trim ().Trim (c);
  179. }
  180. }
  181. }
  182. }
  183. }
  184. catch (Exception ex)
  185. {
  186. logger.Error (ex.Message);
  187. //TODO:Some sort of error handler
  188. }
  189. q++;
  190. }
  191. MyObject.ColCount = Math.Max(qColumnNumber, MyObject.ColCount);
  192. Dt.Rows.Add(dr);
  193. dr.AcceptChanges();
  194. }
  195. protected virtual void Dispose(bool disposing)
  196. {
  197. if (disposing)
  198. {
  199. // dispose managed resources
  200. Dt.Clear();
  201. Dt.Dispose();
  202. }
  203. // free native resources
  204. }
  205. public void Dispose()
  206. {
  207. Dispose(true);
  208. GC.SuppressFinalize(this);
  209. }
  210. private string startText;
  211. private string[] fields;
  212. /// <summary>
  213. /// Get's an array of columns as split by the global separator.
  214. /// </summary>
  215. /// <param name="line"></param>
  216. /// <returns></returns>
  217. private string[] getFields(string line)
  218. {
  219. string[] tmpArray = System.Text.RegularExpressions.Regex.Split(line, GlobalSeparator);
  220. int items = tmpArray.Length;
  221. if (tmpArray[items - 1].Length < 1)
  222. {
  223. items--;
  224. }
  225. line = null;
  226. return (string[])DGen.DGen.ResizeArray(tmpArray, items);
  227. }
  228. // PRIVATE METHODS (MINOR)
  229. // ----------------------------------------------------------------------------------------------------
  230. // columnNameAdjustment
  231. // (straight forward, makes column names suitable for SQL use - exact handling depends on tab2sql.Options.ColumnNameHandling)
  232. private void addColumn(string colHeader, int q)
  233. {
  234. if (Dt.Columns.Contains(colHeader) == false)
  235. {
  236. Dt.Columns.Add(colHeader);
  237. }
  238. else
  239. {
  240. do
  241. {
  242. colHeader = "[CX" + q.ToString() + "]";
  243. q++;
  244. } while (Dt.Columns.Contains(colHeader));
  245. Dt.Columns.Add(colHeader);
  246. }
  247. }
  248. /// <summary>
  249. /// Possibly redundant (27th October 2009)
  250. /// </summary>
  251. /// <param name="column"></param>
  252. /// <returns></returns>
  253. private string columnNameAdjustment(string column)
  254. {
  255. var columnNameSettings = new HandlingOfColumnNames();
  256. columnNameSettings = MyOptions.columnNameModel;
  257. switch (columnNameSettings)
  258. {
  259. case HandlingOfColumnNames.NoChanges:
  260. // foo.
  261. break;
  262. case HandlingOfColumnNames.JustAddQuotes:
  263. column = "[" + column + "]";
  264. break;
  265. case HandlingOfColumnNames.Standard:
  266. column = DGen.DText.FixColumnHeader(column);
  267. break;
  268. default:
  269. break;
  270. }
  271. return column;
  272. }
  273. }
  274. }