/Tools/clsDataTable.cs
http://inferno4proteomics.googlecode.com/ · C# · 741 lines · 617 code · 52 blank · 72 comment · 70 complexity · a44fd3091c3bafac863c39d86e9ae5cd MD5 · raw file
- using System;
- using System.Collections.Generic;
- using System.Collections;
- using System.Text;
- using System.Data;
- using System.Data.OleDb;
- using System.IO;
- using System.Windows.Forms;
- using LumenWorks.Framework.IO.Csv;
- using DAnTE.Tools;
-
-
- namespace DAnTE.Tools
- {
- public static class clsDataTable
- {
- #region File loading methods
- public static DataTable LoadFile2DataTable(string FileName)
- {
- string fileName = FileName;
- string filePath = FileName;
- string fExt;
- String sConnectionString = "";
- DataTable mdtOut = new DataTable();
- DataTable mdtIn = new DataTable();
-
- fileName = Path.GetFileName(FileName);
- filePath = Path.GetDirectoryName(FileName);
- fExt = Path.GetExtension(fileName);
-
- switch (fExt)
- {
- case ".csv":// CSV files
- using (clsGenericParserAdapter parser = new clsGenericParserAdapter())
- {
- parser.SetDataSource(FileName);
- parser.ColumnDelimiter = ",".ToCharArray();
- parser.FirstRowHasHeader = true;
- parser.MaxBufferSize = 4096;
- parser.TextQualifier = '\"';
- mdtIn = parser.GetDataTable();
- parser.Close();
- mdtOut = clsDataTable.ReplaceMissingStr(mdtIn);
- }
- break;
- case ".txt":
- using (clsGenericParserAdapter parser = new clsGenericParserAdapter())
- {
- parser.SetDataSource(FileName);
- parser.ColumnDelimiter = "\t".ToCharArray();
- parser.FirstRowHasHeader = true;
- parser.MaxBufferSize = 4096;
- parser.TextQualifier = '\"';
- mdtIn = parser.GetDataTable();
- parser.Close();
- mdtOut = clsDataTable.ReplaceMissingStr(mdtIn);
- }
- break;
- case ".xls"://Excel files
- sConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" +
- FileName + ";" + "Extended Properties=Excel 8.0;";
- goto case "Excel";
- case ".xlsx": // New Excel files
- sConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source=" +
- FileName + ";" + "Extended Properties=Excel 12.0;";
- goto case "Excel";
- case "Excel":
- OleDbConnection objConn = null;
- DataTable dt = null;
- try
- {
- //String sConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" +
- // FileName + ";" + "Extended Properties=Excel 8.0;";
- objConn = new OleDbConnection(sConnectionString);
- objConn.Open();
- dt = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
- if (dt == null)
- {
- return null;
- }
- String[] excelSheets = new String[dt.Rows.Count];
- int i = 0;
-
- // Add the sheet name to the string array.
- foreach (DataRow row in dt.Rows)
- {
- excelSheets[i] = row["TABLE_NAME"].ToString();
- i++;
- }
- string sheetCmd = "SELECT * FROM [" + excelSheets[0] +"]"; //read the first table
- OleDbCommand objCmdSelect = new OleDbCommand(sheetCmd, objConn);
- OleDbDataAdapter objAdapter1 = new OleDbDataAdapter();
- objAdapter1.SelectCommand = objCmdSelect;
- objAdapter1.Fill(mdtOut);
- //mdtOut = clsDataTable.ClearNulls(mdtIn) ;
- }
- catch (Exception ex)
- {
- Console.WriteLine(ex.Message.ToString());
- }
- finally
- {
- // Clean up.
- if (objConn != null)
- {
- objConn.Close();
- objConn.Dispose();
- }
- if (dt != null)
- {
- dt.Dispose();
- }
- }
- break;
- default:
- Console.WriteLine("Unknown File");
- //fileOK = false;
- mdtOut = null;
- break;
- }
- return mdtOut;
- }
-
- public static DataTable LoadFile2DataTableJETOLEDB(string FileName)
- {
- string fileName = FileName;
- string filePath = FileName;
- string fExt;
- DataTable mdtOut = new DataTable();
- DataTable mdtIn = new DataTable();
- OleDbConnection objConn = null;
- DataTable dt = null;
-
- fileName = Path.GetFileName(FileName);
- filePath = Path.GetDirectoryName(FileName);
- fExt = Path.GetExtension(fileName);
-
- switch (fExt)
- {
- case ".csv":// CSV files
- case ".txt":
- objConn = null;
- dt = null;
- try
- {
- String sConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;" +
- "Data Source=" + filePath + ";" +
- @"Extended Properties=""text;HDR=Yes;FMT=Delimited""";
- objConn = new OleDbConnection(sConnectionString);
- objConn.Open();
-
- string sheetCmd = "SELECT * FROM [" + fileName + "]"; //read the table
- OleDbCommand objCmdSelect = new OleDbCommand(sheetCmd, objConn);
- OleDbDataAdapter objAdapter1 = new OleDbDataAdapter();
- objAdapter1.SelectCommand = objCmdSelect;
- objAdapter1.Fill(mdtOut);
- //mdtOut = clsDataTable.ClearNulls(mdtIn) ;
- }
- catch (Exception ex)
- {
- Console.WriteLine(ex.Message.ToString());
- }
- finally
- {
- // Clean up.
- if (objConn != null)
- {
- objConn.Close();
- objConn.Dispose();
- }
- if (dt != null)
- {
- dt.Dispose();
- }
- }
- break;
- case ".xls"://Excel files
- objConn = null;
- dt = null;
- try
- {
- String sConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" +
- FileName + ";" + "Extended Properties=Excel 8.0;";
- objConn = new OleDbConnection(sConnectionString);
- objConn.Open();
- dt = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
- if (dt == null)
- {
- return null;
- }
- String[] excelSheets = new String[dt.Rows.Count];
- int i = 0;
-
- // Add the sheet name to the string array.
- foreach (DataRow row in dt.Rows)
- {
- excelSheets[i] = row["TABLE_NAME"].ToString();
- i++;
- }
- string sheetCmd = "SELECT * FROM [" + excelSheets[0] + "]"; //read the first table
- OleDbCommand objCmdSelect = new OleDbCommand(sheetCmd, objConn);
- OleDbDataAdapter objAdapter1 = new OleDbDataAdapter();
- objAdapter1.SelectCommand = objCmdSelect;
- objAdapter1.Fill(mdtOut);
- //mdtOut = clsDataTable.ClearNulls(mdtIn) ;
- }
- catch (Exception ex)
- {
- Console.WriteLine(ex.Message.ToString());
- }
- finally
- {
- // Clean up.
- if (objConn != null)
- {
- objConn.Close();
- objConn.Dispose();
- }
- if (dt != null)
- {
- dt.Dispose();
- }
- }
- break;
- default:
- Console.WriteLine("Unknown File");
- //fileOK = false;
- mdtOut = null;
- break;
- }
- return mdtOut;
- }
-
- public static DataTable LoadFile2DataTableFastCSVReader(string FileName)
- {
- string fileName = FileName;
- string filePath = FileName;
- string fExt;
- String sConnectionString = "";
- DataTable mdtOut = new DataTable();
- DataTable mdtIn = new DataTable();
-
- fileName = Path.GetFileName(FileName);
- filePath = Path.GetDirectoryName(FileName);
- fExt = Path.GetExtension(fileName);
-
- switch (fExt)
- {
- case ".csv":// CSV files
- using (CsvReader csv = new CsvReader(new System.IO.StreamReader(FileName), true))
- {
- csv.MissingFieldAction = MissingFieldAction.ReplaceByEmpty;
- mdtOut.Load(csv);
- }
- break;
- case ".txt":
- using (CsvReader csv = new CsvReader(new System.IO.StreamReader(FileName), true, '\t'))
- {
- csv.MissingFieldAction = MissingFieldAction.ReplaceByEmpty;
- mdtOut.Load(csv);
- }
- break;
- case ".xls"://Excel files
- sConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" +
- FileName + ";" + "Extended Properties=Excel 8.0;";
- goto case "Excel";
- case ".xlsx": // New Excel files
- sConnectionString = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source=" +
- FileName + ";" + "Extended Properties=Excel 12.0;";
- goto case "Excel";
- case "Excel":
- OleDbConnection objConn = null;
- DataTable dt = null;
- string mstrSheet = null;
- try
- {
- //String sConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" +
- // FileName + ";" + @"Extended Properties=""Excel 8.0;HDR=Yes;IMEX=1;""";
- objConn = new OleDbConnection(sConnectionString);
- objConn.Open();
- dt = objConn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
- if (dt == null)
- {
- return null;
- }
- if (dt.Rows.Count == 1)
- mstrSheet = (dt.Rows[0])["TABLE_NAME"].ToString();
- else
- {
- ArrayList marrExcelSheets = new ArrayList();
- int i = 0;
-
- // Add the sheet name to the string array.
- foreach (DataRow row in dt.Rows)
- {
- mstrSheet = row["TABLE_NAME"].ToString();
- marrExcelSheets.Add(mstrSheet);
- i++;
- }
- frmSelectExcelSheet mfrmSheets = new frmSelectExcelSheet();
- mfrmSheets.PopulateListBox = marrExcelSheets;
- if (mfrmSheets.ShowDialog() == DialogResult.OK)
- {
- i = mfrmSheets.SelectedSheet;
- mstrSheet = marrExcelSheets[i].ToString();
- }
- else
- {
- mdtOut = null;
- break;
- }
- }
- string sheetCmd = "SELECT * FROM [" + mstrSheet + "]";
- OleDbCommand objCmdSelect = new OleDbCommand(sheetCmd, objConn);
- OleDbDataAdapter objAdapter1 = new OleDbDataAdapter();
- objAdapter1.SelectCommand = objCmdSelect;
- objAdapter1.Fill(mdtOut);
- }
- catch (Exception ex)
- {
- Console.WriteLine(ex.Message.ToString());
- }
- finally
- {
- // Clean up.
- if (objConn != null)
- {
- objConn.Close();
- objConn.Dispose();
- }
- if (dt != null)
- {
- dt.Dispose();
- }
- }
- break;
- default:
- Console.WriteLine("Unknown File");
- //fileOK = false;
- mdtOut = null;
- break;
- }
- return mdtOut;
- }
- #endregion
-
- public static DataTable Array2DataTable(double[,] matrix, string[] rowNames, string[] colHeaders)
- {
- DataTable mDataTable = new DataTable();
- DataColumn mDataColumn;
- DataRow mDataRow;
-
- mDataColumn = new DataColumn();
- mDataColumn.DataType = System.Type.GetType("System.String");
- mDataColumn.ColumnName = "Row_ID";
- //mDataColumn.ReadOnly = true ;
- mDataTable.Columns.Add(mDataColumn);
-
- for (int i = 0; i < colHeaders.Length; i++)
- {
- mDataColumn = new DataColumn();
- mDataColumn.DataType = System.Type.GetType("System.String");
- mDataColumn.ColumnName = colHeaders[i];
- //mDataColumn.ReadOnly = true ;
- mDataTable.Columns.Add(mDataColumn);
- }
- for (int i = 0; i < matrix.GetLength(0); i++)
- {
- mDataRow = mDataTable.NewRow();
- mDataRow[0] = rowNames[i];
- for (int j = 0; j < matrix.GetLength(1); j++)
- {
- mDataRow[j + 1] = matrix[i, j].ToString();
- }
- mDataTable.Rows.Add(mDataRow);
- }
- return ReplaceMissingStr(mDataTable);
- }
-
- public static string[] DataColumn2strArray(DataTable mTab, string Colmn)
- {
- string[] mStrArr = new string[mTab.Rows.Count];
- DataColumnCollection columns = mTab.Columns;
- int i = 0;
- foreach (DataColumn column in columns)
- {
- if (!Colmn.Equals(column.ColumnName))
- i++;
- else
- break;
- }
- for (int row = 0; row < mTab.Rows.Count; row++)
- {
- mStrArr[row] = mTab.Rows[row].ItemArray[i].ToString();
- }
- return mStrArr;
- }
-
- public static DataTable ReplaceMissingStr(DataTable dt)
- {
- string cell = null;
- int Nrows = dt.Rows.Count;
- //DataTable outDtable = new DataTable();
-
- for (int row = 0; row < Nrows; row++)
- {
- //DataRow workRow = dt.Rows[row] ;
- string[] obj;
- obj = new string[dt.Rows[row].ItemArray.Length];
- for (int col = 0; col < dt.Rows[row].ItemArray.Length; col++)
- {
- obj[col] = dt.Rows[row].ItemArray[col].ToString();
- cell = dt.Rows[row].ItemArray[col].ToString();
- if (cell.Equals("999999") || cell.Equals(" 9.999990e+05"))
- {
- string s = "";
- obj[col] = s;
- }
- }
- //outDtable.Rows.Add(obj);
- dt.Rows[row].ItemArray = obj;
- }
- dt.AcceptChanges();
- return dt;
- }
-
- public static DataTable ReplaceMissing(DataTable dt)
- {
- string cell = null;
- int Nrows = dt.Rows.Count;
- //DataTable outDtable = new DataTable();
-
- for (int row = 0; row < Nrows; row++)
- {
- //DataRow workRow = dt.Rows[row] ;
- Object[] obj = new object[dt.Rows[row].ItemArray.Length];
- //obj = new string[dt.Rows[row].ItemArray.Length];
- for (int col = 0; col < dt.Rows[row].ItemArray.Length; col++)
- {
- obj[col] = dt.Rows[row].ItemArray[col];
- cell = dt.Rows[row].ItemArray[col].ToString();
- if (cell.Equals("999999") || cell.Equals(" 9.999990e+05"))
- {
- obj[col] = System.DBNull.Value;
- }
- }
- //outDtable.Rows.Add(obj);
- dt.Rows[row].ItemArray = obj;
- }
- dt.AcceptChanges();
- return dt;
- }
-
- public static DataTable ClearNulls(DataTable dt)
- {
- int Nrows = dt.Rows.Count;
- DataTable outDtable = new DataTable();
-
- for (int row = 0; row < Nrows; row++)
- {
- //DataRow workRow = dt.Rows[row] ;
- object[] obj;
- obj = new object[dt.Rows[row].ItemArray.Length];
- for (int col = 0; col < dt.Rows[row].ItemArray.Length; col++)
- {
- obj[col] = dt.Rows[row].ItemArray[col];
- if (dt.Rows[row].ItemArray[col] == System.DBNull.Value)
- {
- string s = "";
- obj[col] = (object)s;
- }
- }
- dt.Rows[row].ItemArray = obj;
- }
- dt.AcceptChanges();
- return dt;
- }
-
- public static void RemoveDuplicateRows(DataTable dTable, string colName)
- {
- Hashtable hTable = new Hashtable();
- ArrayList duplicateList = new ArrayList();
-
- foreach (DataRow drow in dTable.Rows)
- {
- try
- {
- hTable.Add(drow[colName], string.Empty);
- }
- catch
- {
- duplicateList.Add(drow);
- }
- }
-
- foreach (DataRow dRow in duplicateList)
- dTable.Rows.Remove(dRow);
- }
-
- public static DataTable RemoveDuplicateRows2(DataTable dTable, string colName)
- {
- Hashtable hTable = new Hashtable();
- DataRow prevRow;
- ArrayList duplicateList = new ArrayList();
-
- foreach (DataColumn dC in dTable.Columns)
- {
- dC.ReadOnly = false;
- }
-
- foreach (DataRow thisRow in dTable.Rows)
- {
- if (!ValidRow(thisRow))
- duplicateList.Add(thisRow);
- else
- try
- {
- hTable.Add(thisRow[colName], thisRow);
- }
- catch
- {
- duplicateList.Add(thisRow);
- prevRow = (DataRow)hTable[thisRow[colName]];
- if (!RowsIdentical(thisRow, prevRow))
- {
- DataRow currentRow = addRows(prevRow, thisRow);
- hTable[thisRow[colName]] = currentRow;
- dTable.AcceptChanges();
- }
- }
- }
-
- foreach (DataRow dRow in duplicateList)
- dTable.Rows.Remove(dRow);
-
- return dTable;
- }
-
- public static bool RowsIdentical(DataRow row1, DataRow row2)
- {
- bool success = true;
- for (int i = 0; i < row1.ItemArray.Length; i++)
- {
- if (!(row1.ItemArray[i].Equals(row2.ItemArray[i])))
- {
- success = false;
- break;
- }
-
- }
- return success;
- }
-
- public static bool ValidRow(DataRow row) // not all empty
- {
- bool success = false;
- for (int i = 1; i < row.ItemArray.Length; i++)
- {
- if (!(row.ItemArray[i].Equals("")))
- {
- success = true;
- break;
- }
-
- }
- return success;
- }
-
- public static DataRow addRows(DataRow row1, DataRow row2)
- {
- Double val1, val2, val3;
- for (int i = 1; i < row1.ItemArray.Length; i++)
- {
- val1 = row1.ItemArray[i].Equals("") ? 0 : Convert.ToDouble(row1.ItemArray[i]);
- val2 = row2.ItemArray[i].Equals("") ? 0 : Convert.ToDouble(row2.ItemArray[i]);
- val3 = val1 + val2;
- if (Math.Abs(val3) > Double.Epsilon)
- row1[i] = val3.ToString();
- }
- return row1;
- }
-
- //----------------------------------------------------------------------------
- /// <summary>
- /// Removes duplicate rows from given DataTable
- /// </summary>
- /// <param name="tbl">Table to scan for duplicate rows</param>
- /// <param name="KeyColumns">An array of DataColumns
- /// containing the columns to match for duplicates</param>
- public static void RemoveDuplicates(DataTable tbl, DataColumn[] keyColumns)
- {
- int rowNdx = 0;
- while (rowNdx < tbl.Rows.Count - 1)
- {
- DataRow[] dups = FindDups(tbl, rowNdx, keyColumns);
- if (dups.Length > 0)
- {
- foreach (DataRow dup in dups)
- {
- tbl.Rows.Remove(dup);
- }
- }
- else
- {
- rowNdx++;
- }
- }
- }
-
- private static DataRow[] FindDups(DataTable tbl, int sourceNdx, DataColumn[] keyColumns)
- {
- ArrayList retVal = new ArrayList();
- DataRow sourceRow = tbl.Rows[sourceNdx];
- for (int i = sourceNdx + 1; i < tbl.Rows.Count; i++)
- {
- DataRow targetRow = tbl.Rows[i];
- if (IsDup(sourceRow, targetRow, keyColumns) || Is1stColumnEmpty(targetRow))
- {
- retVal.Add(targetRow);
- }
- }
- return (DataRow[])retVal.ToArray(typeof(DataRow));
- }
-
-
-
- private static bool IsDup(DataRow sourceRow, DataRow targetRow, DataColumn[] keyColumns)
- {
- bool retVal = true;
- foreach (DataColumn column in keyColumns)
- {
- retVal = retVal && sourceRow[column].Equals(targetRow[column]);
- if (!retVal) break;
- }
- return retVal;
- }
-
- private static bool Is1stColumnEmpty(DataRow sourceRow)
- {
- if (sourceRow[0].ToString().Equals(""))
- return true;
- else
- return false;
- }
-
- //private static bool Is2ndColumnEmpty(DataRow sourceRow)
- //{
- // if (sourceRow[1].ToString().Equals(""))
- // return true;
- // else
- // return false;
- //}
-
- //public static void RemoveProtEmpty(DataTable tbl)
- //{
- // DataRow[] blankRs = FindEmpty(tbl);
- // if (blankRs.Length > 0)
- // {
- // foreach (DataRow dup in blankRs)
- // {
- // tbl.Rows.Remove(dup);
- // }
- // }
- //}
-
- //private static DataRow[] FindEmpty(DataTable tbl)
- //{
- // ArrayList retVal = new ArrayList();
- // for (int i = 0; i < tbl.Rows.Count; i++)
- // {
- // DataRow targetRow = tbl.Rows[i];
- // if (Is1stColumnEmpty(targetRow) || Is2ndColumnEmpty(targetRow))
- // {
- // retVal.Add(targetRow);
- // }
- // }
- // return (DataRow[])retVal.ToArray(typeof(DataRow));
- //}
-
- /// <summary>
- /// Get the DataTable column names to an arraylist
- /// </summary>
- public static ArrayList DataTableColumns(DataTable dt, bool dataonly)
- {
- ArrayList marrCols = new ArrayList();
- int i = 0;
- foreach (DataColumn column in dt.Columns)
- {
- if (dataonly)
- {
- //Ignore MassTag column
- //if (i != 0 || !column.ColumnName.Equals("PepCount"))
- if (i != 0)
- marrCols.Add(column.ColumnName);
- i++;
- }
- else
- marrCols.Add(column.ColumnName);
- }
- return marrCols;
- }
-
- public static ArrayList DataTableColumns(DataTable dt, string dataset)
- {
- ArrayList marrCols = new ArrayList();
- int i = 0;
- bool prots = (dataset.Contains("pData") || dataset.Contains("qrollup"));
-
- foreach (DataColumn column in dt.Columns)
- {
- if (prots)
- {
- //Ignore first two columns
- if (i > 2)
- marrCols.Add(column.ColumnName);
- i++;
- }
- else
- {
- //Ignore the first column
- if (i != 0)
- marrCols.Add(column.ColumnName);
- i++;
- }
- }
- return marrCols;
- }
-
- public static ArrayList DataTableRows(DataTable dt)
- {
- ArrayList marrRows = new ArrayList();
-
- foreach (DataRow dRow in dt.Rows)
- {
- marrRows.Add(dRow.ItemArray[0].ToString());
- }
- return marrRows;
- }
-
- }
- }