/tab2sql/Classes/Core/TSV2DataTable.cs
C# | 308 lines | 259 code | 15 blank | 34 comment | 20 complexity | 4e2d937dcbf9c9c08b63c36f2c0c03e8 MD5 | raw file
- using System;
- using System.Data;
- using System.Globalization;
- using DGen;
- using NLog;
-
- namespace Tab2Sql
- {
- internal partial class TSV2DataTable
- {
-
- private int currentRow;
-
- private static Logger logger = LogManager.GetCurrentClassLogger ();
- public System.Text.StringBuilder Diagnostics;
- public readonly string GlobalSeparator = PrepareText.GlobalDelimiter;
- public DataTable Dt;
- public Tab2SqlObj MyObject;
- /// <summary>
- /// This is
- /// </summary>
- public Tab2sqlOptions MyOptions;
-
- /// <summary>
- /// Main constructor
- /// </summary>
- /// <param name="input"></param>
- /// <param name="myO1"></param>
- /// <param name="myO2"></param>
- public TSV2DataTable(String input, Tab2sqlOptions myO1, Tab2SqlObj myO2)
- {
- Dt = new DataTable();
- currentRow = 0;
- MyOptions = myO1;
- MyObject = myO2;
- startText = input;
- Dt.Locale = CultureInfo.InvariantCulture;
- Convert2DT();
- }
-
- // PUBLIC METHODS
- // ----------------------------------------------------------------------------
- /// <summary>
- /// Takes block of text of n lines and puts DataTable into Dt object.
- /// </summary>
- /// <param name="input"></param>
- private void Convert2DT()
- {
- // Vars
- int i = 0;
- var preppy = new PrepareText(startText, MyOptions.Delimiter);
- Diagnostics = preppy.Diagnostics;
- startText = preppy.PreparedText.ToString();
- string[] Lines = startText.Split('\n');
-
- MyObject.RowCount = Convert.ToInt32(Lines.LongLength);
- MyObject.ColCount = preppy.EstimatedColumns;
- Dt.MinimumCapacity = Convert.ToInt32(Lines.LongLength);
-
- Dt.BeginLoadData();
-
- foreach (string Line in Lines)
- {
-
- MyOptions.IgnoreComments = true;
- if (MyOptions.IgnoreComments && Line.Length > 0 && (Line[0] == '#' || Line[0] == '$'))
- {
- // meh
- }
- else // process text, it's either the first row or it's not...
- {
- if (i == 0)
- {
- DoHeaderRow(Line.TrimEnd());
- }
- if (i > 0)
- {
- DoBodyRow(Line.TrimEnd());
- }
- currentRow = i;
- i++;
- }
- }
- Dt.AcceptChanges();
- Dt.EndLoadData();
-
- if (MyOptions.FirstRowIsHeader)
- {
- i--;
- }
- MyObject.RowCount = i;
-
- }
-
- // PRIVATE METHODS (MAJOR)
- // ----------------------------------------------------------------------------
- // i) DoHeaderRow(LineNumber, LineContent)
- // ii) DoBodyRow(LineNumber, LineContent)
- // Note : These methods are capitalised because they were publicly accessible in previous version.
- /// <summary>
- /// insert the header into the datatable
- /// </summary>
- /// <param name="Line"></param>
- ///
- private void DoHeaderRow(string line)
- {
- string colHeader = String.Empty;
- int q = 0; // q = column index
-
- fields = getFields(line);
- int columnGap = MyObject.ColCount - fields.Length;
- foreach (string columnName in fields)
- {
- if (MyOptions.FirstRowIsHeader == false || columnName.Length < 1)
- {
- colHeader = "[C100" + q + "]";
- }
- else if (MyOptions.FirstRowIsHeader && columnName.Length >= 1)
- {
- colHeader = Tab2Sql.Classes.Core.tsvHelper.columnNameAdjustment(columnName, MyOptions.columnNameModel).Trim();
- }
- // this check needed because SQL won't allow two columns of the same name
- addColumn(colHeader, q);
- q++;
- }
-
- if (columnGap > 0)
- {
- for (int i = 0; i < columnGap; i++)
- {
- addColumn("gapColumn", i);
- }
- }
- MyObject.ColCount = Math.Max(MyObject.ColCount, q);
- }
-
- /// <summary>
- /// insert a row into the datatable
- /// </summary>
- /// <param name="Line">Row contents</param>
- private void DoBodyRow (string line)
- {
- int q = 0;
- // column number
- int qColumnNumber = 0;
- // column number ignoring empty columns.
- if (line.Trim ().Length < 2)
- {
- return;
- }
- DataRow dr = Dt.NewRow ();
- DataTypes currentDType = DataTypes.unknown;
- fields = getFields (line);
- int ColCounter = fields.GetLength (0);
- foreach (string colString in fields)
- {
- char[] c = { '"', '#' };
-
- string colValue = colString.Trim ().Trim (c);
- if (colValue.Length > 0)
- {
- if (colValue == "NULL")
- {
- colValue = "";
- }
- currentDType = DTypes.GuessMyType (colValue);
- //MyObject.ValueTypes[qColumnNumber, currentRow] = currentDType;
- qColumnNumber++;
- if (currentDType == DataTypes.integer || currentDType == DataTypes.numeric)
- {
- colValue = DText.CleanNumeric (colValue);
-
- MyObject.Highest[q] = Math.Max (MyObject.Highest[q], Convert.ToDecimal (colValue));
- MyObject.Lowest[q] = Math.Min (MyObject.Lowest[q], Convert.ToDecimal (colValue));
- }
- // Work out precedence. Do we stick with existing best guess or use new one?
- MyObject.ColumnTypes[q] = DTypes.chooseDataType (currentDType, MyObject.ColumnTypes[q]);
- // Is this value longer than those that went before it ?
- MyObject.LongestLine[q] = Math.Max (colString.Trim ().Length, MyObject.LongestLine[q]);
- }
- try
- {
- if (colString != null && colString != "NULL")
- {
- if (colString.Length > 0)
- {
- if (currentDType == DataTypes.integer || currentDType == DataTypes.numeric)
- {
- dr[q] = DGen.DText.CleanValue (colString).Trim ();
- }
- else
- {
- if (dr[q] != null)
- {
- dr[q] = colString.Trim ().Trim (c);
- }
- }
- }
- }
- }
- catch (Exception ex)
- {
- logger.Error (ex.Message);
-
- //TODO:Some sort of error handler
- }
- q++;
- }
- MyObject.ColCount = Math.Max(qColumnNumber, MyObject.ColCount);
- Dt.Rows.Add(dr);
- dr.AcceptChanges();
- }
-
- protected virtual void Dispose(bool disposing)
- {
- if (disposing)
- {
- // dispose managed resources
- Dt.Clear();
- Dt.Dispose();
- }
- // free native resources
- }
-
- public void Dispose()
- {
- Dispose(true);
- GC.SuppressFinalize(this);
- }
-
-
- private string startText;
- private string[] fields;
-
-
-
-
-
- /// <summary>
- /// Get's an array of columns as split by the global separator.
- /// </summary>
- /// <param name="line"></param>
- /// <returns></returns>
- private string[] getFields(string line)
- {
- string[] tmpArray = System.Text.RegularExpressions.Regex.Split(line, GlobalSeparator);
- int items = tmpArray.Length;
- if (tmpArray[items - 1].Length < 1)
- {
- items--;
- }
- line = null;
- return (string[])DGen.DGen.ResizeArray(tmpArray, items);
- }
-
-
- // PRIVATE METHODS (MINOR)
- // ----------------------------------------------------------------------------------------------------
- // columnNameAdjustment
- // (straight forward, makes column names suitable for SQL use - exact handling depends on tab2sql.Options.ColumnNameHandling)
-
- private void addColumn(string colHeader, int q)
- {
-
- if (Dt.Columns.Contains(colHeader) == false)
- {
- Dt.Columns.Add(colHeader);
- }
- else
- {
- do
- {
- colHeader = "[CX" + q.ToString() + "]";
- q++;
- } while (Dt.Columns.Contains(colHeader));
- Dt.Columns.Add(colHeader);
- }
- }
-
- /// <summary>
- /// Possibly redundant (27th October 2009)
- /// </summary>
- /// <param name="column"></param>
- /// <returns></returns>
- private string columnNameAdjustment(string column)
- {
- var columnNameSettings = new HandlingOfColumnNames();
- columnNameSettings = MyOptions.columnNameModel;
- switch (columnNameSettings)
- {
- case HandlingOfColumnNames.NoChanges:
- // foo.
- break;
- case HandlingOfColumnNames.JustAddQuotes:
- column = "[" + column + "]";
- break;
- case HandlingOfColumnNames.Standard:
- column = DGen.DText.FixColumnHeader(column);
-
- break;
- default:
-
- break;
- }
- return column;
- }
- }
- }