/DataParser/DataParser/DataParser.cs
C# | 389 lines | 235 code | 59 blank | 95 comment | 20 complexity | 8e58c6daba29e41d92b345b3093d6713 MD5 | raw file
- using System;
- using System.Collections;
- using System.Collections.Generic;
- using System.Text;
- using System.Xml;
- using System.Data;
-
- namespace DataParser
- {
- /// <summary>
- /// A simple data binder from raw data.
- /// </summary>
- public class DataParser
- {
- #region VARIABLES
-
- protected const string DATA_MODEL_PATH = "data.models.xml";
- protected const string META_DATA_PATH = "metadata.xml";
- protected char[] separatorChars = new char[]{','};
- protected bool isReadMetadata;
- protected const string OUTPUT_FORMAT = "{0}{1}";
-
- /// <summary>
- /// Store the models of data.
- /// </summary>
- protected Hashtable dataModels;
-
- /// <summary>
- /// Store the filter of data.
- /// </summary>
- protected Hashtable dataFilter;
-
- /// <summary>
- /// Determine the key words and filter will be case-sensitive or not.
- /// </summary>
- protected bool isMatchCase;
-
- /// <summary>
- /// Contains the current parsed data items.
- /// </summary>
- protected List<DataItem> parsedDataItems;
-
- /// <summary>
- /// The table of data that parsed from the raw data.
- /// </summary>
- protected DataTable dataTable;
-
- /// <summary>
- /// Gets the table of data that parsed from the raw data.
- /// </summary>
- public DataTable ParsedDataTable
- {
- get { return this.dataTable; }
- }
-
- /// <summary>
- /// Gets the collection of attributes and their associate keywords.
- /// </summary>
- public Hashtable DataModels
- {
- get { return this.dataModels; }
- }
-
- /// <summary>
- /// Gets or sets the value to determine the key words and filter will be case-sensitive or not.
- /// The default value is TRUE.
- /// </summary>
- public bool MatchCase
- {
- get { return this.isMatchCase; }
- set { this.isMatchCase = value; }
- }
-
- protected string separator;
- /// <summary>
- /// Gets or sets the separator between each data fields.
- /// </summary>
- public string Separator
- {
- get { return this.separator; }
- set { this.separator = value; }
- }
-
- protected string[] columnsOrder;
-
- /// <summary>
- /// Gets the lastest order of data field parsed from raw data.
- /// </summary>
- public string[] ColumnsOrder
- {
- get { return this.columnsOrder; }
- }
-
- #endregion
-
- #region CONSTRUCTOR
-
- /// <summary>
- /// Initializes a new instance of DataParser with the default settings.
- /// To make it capable of parsing data, the object must be init its internal conditions first (by calling the method Init()).
- /// </summary>
- public DataParser()
- {
- this.dataModels = new Hashtable();
- this.dataFilter = new Hashtable();
- this.isReadMetadata = false;
- this.isMatchCase = true;
- this.separator = String.Empty;
- this.columnsOrder = null;
- }
-
- #endregion
-
- #region CORE METHODS
-
- /// <summary>
- /// Read Meta data from metadata.xml.
- /// </summary>
- protected void readMetaData()
- {
- try
- {
- XmlDocument xmldoc = new XmlDocument();
- xmldoc.Load(META_DATA_PATH);
-
- //get root node
- XmlElement elemRoot = xmldoc.DocumentElement;
-
- #region DATA MODELS
-
- XmlNodeList elemModelList = elemRoot.GetElementsByTagName(XmlConst.DATA_MODEL);
-
- XmlNodeList elemAttribList = ((XmlElement)elemModelList[0]).GetElementsByTagName(XmlConst.ATTRIB);
-
- for (int i = 0; i < elemAttribList.Count; i++)
- {
- XmlElement elemAttrib = (XmlElement)elemAttribList[i];
- string attribName = elemAttrib.GetAttribute(XmlConst.ATTRIB_ID);
-
- //Get child nodes <keywords>
- XmlNodeList elemKeys = elemAttrib.GetElementsByTagName(XmlConst.KEYWORDS);
- string strKeywords = elemKeys[0].InnerText;
-
- //Split the keywords and put them into hash table.
- string[] keyWordsArray = strKeywords.Split(separatorChars, StringSplitOptions.None);
-
- if (keyWordsArray == null || keyWordsArray.Length <= 0)
- continue;
-
- Hashtable keywords = new Hashtable();
- for (int n = 0; n < keyWordsArray.Length; n++)
- {
- string strTemp = keyWordsArray[n].Trim();
- keywords.Add(strTemp, strTemp);
- }
-
- //Add the attribute and its keywords to the hash table.
- this.dataModels.Add(attribName, keywords);
-
- this.isReadMetadata = true;
- }
-
- #endregion
-
- #region DATA FILTER
-
- XmlNodeList elemFilterList = elemRoot.GetElementsByTagName(XmlConst.DATA_FILTER);
- for (int i = 0; i < elemFilterList.Count; i++)
- {
- this.dataFilter.Add(elemFilterList[i].InnerText, elemFilterList[i].InnerText);
- }
-
- #endregion
- }
- catch (Exception)
- {
- throw;
- }
- }
-
- /// <summary>
- /// Initializes the data table.
- /// </summary>
- protected void initDataTable()
- {
- //The data table will be initialized only after loading the data models.
- if (!this.isReadMetadata)
- return;
-
- this.dataTable = new DataTable();
-
- foreach (string strAttrib in this.dataModels.Keys)
- {
- this.dataTable.Columns.Add(strAttrib);
- }
- }
-
- /// <summary>
- /// Searches and gets the fiels by checking
- /// </summary>
- /// <param name="rawData">The original data.</param>
- /// <returns>The structured data.</returns>
- protected string searchAndGet(string rawData)
- {
- string strResullt = string.Empty;
- string strTempData = string.Empty;
-
- if (!this.isMatchCase)
- strTempData = rawData.ToUpper();
-
- foreach (string strAttrib in this.dataModels.Keys)
- {
- Hashtable keywords = this.dataModels[strAttrib] as Hashtable;
- foreach (string strKeyword in keywords.Keys)
- {
- if (String.IsNullOrEmpty(strKeyword))
- continue;
-
- foreach (string filter in this.dataFilter.Values)
- {
- if (this.isMatchCase)
- {
- rawData = rawData.Replace(filter, String.Empty);
- }
- else
- {
- //Because the method Remove does not remove all occurrences of a specified characters,
- //we must iteratively remove all of them from the original string.
- //And because of non-case-sensitive we will both change the original string and the filter string to lower or upper.
- string strTempfilter = filter.ToUpper();
-
- //Remove all filter charaters from the original string.
- while (strTempData.Contains(strTempfilter))
- {
- int indexOfFilter = strTempData.IndexOf(strTempfilter);
- rawData = rawData.Remove(indexOfFilter, strTempfilter.Length);
- strTempData = strTempData.Remove(indexOfFilter, strTempfilter.Length);
- }
- }
- }
-
- if (this.isMatchCase && rawData.Contains(strKeyword))
- {
- string[] content = rawData.Trim().Split(new string[] { strKeyword }, StringSplitOptions.None);
- strResullt = String.Format(OUTPUT_FORMAT, content[1], this.separator);
- break;
- }
- else if (!this.isMatchCase)
- {
- string strTempKey = strKeyword.ToUpper();
- //Remove all filter charaters from the original string.
- if (strTempData.Contains(strTempKey))
- {
- int indexOfKey = strTempData.IndexOf(strTempKey);
- strTempKey = rawData.Trim().Remove(indexOfKey, strTempKey.Length);
- strResullt = String.Format(OUTPUT_FORMAT, strTempKey, this.separator);
- break;
- }
- }
- }
-
- if (!String.IsNullOrEmpty(strResullt))
- {
- this.parsedDataItems.Add(new DataItem(strAttrib, strResullt));
- break;
- }
- }
-
- //if(string.IsNullOrEmpty(strResullt.ToString()))
- // strResullt = separator;
- return strResullt;
- }
-
- /// <summary>
- /// Adds the just parsed data into data table.
- /// </summary>
- protected void addToDataTable()
- {
- try
- {
- DataRow row = this.dataTable.NewRow();
- this.columnsOrder = new string[this.parsedDataItems.Count];
-
- for (int i = 0; i < this.parsedDataItems.Count; i++)
- {
- row[parsedDataItems[i].Key] = parsedDataItems[i].Value.Replace(this.separator, String.Empty);
- this.columnsOrder[i] = parsedDataItems[i].Key;
- }
- this.dataTable.Rows.Add(row);
- }
- catch (Exception)
- {
- throw;
- }
- }
-
- /// <summary>
- /// Initializes the data parser's conditions to ready for parsing data.
- /// </summary>
- public void Init()
- {
- try
- {
- this.readMetaData();
- this.initDataTable();
- }
- catch (Exception)
- {
- throw;
- }
- }
-
- /// <summary>
- /// Parses the raw data to structured data.
- /// </summary>
- /// <param name="strInputData"></param>
- /// <param name="separator"></param>
- /// <returns></returns>
- public string ParseData(string[] strInputData, string separator)
- {
- try
- {
- StringBuilder outputData = new StringBuilder();
-
- this.parsedDataItems = new List<DataItem>();
-
- //Important: nead to initialize this property before parsing data.
- this.separator = separator;
-
- //outputData.Append(this.generateHeader(separator));
- for (int i = 0; i < strInputData.Length; i++)
- {
- //Skip all empty lines.
- if (String.IsNullOrEmpty(strInputData[i]))
- continue;
-
- outputData.Append(searchAndGet(strInputData[i]));
- //if(i == this.dataModels.Count -1)
- // outputData.Append(Environment.NewLine);
- }
-
- this.addToDataTable();
- return outputData.ToString();
- }
- catch (Exception)
- {
- throw;
- }
- }
-
- #endregion
-
- /// <summary>
- /// Represents a field or an item in the table of data.
- /// </summary>
- public class DataItem
- {
- /// <summary>
- /// The unique ID or key of this data item.
- /// </summary>
- public string Key;
-
- /// <summary>
- /// The value that this item contains.
- /// </summary>
- public string Value;
-
- /// <summary>
- /// Initializes a new instance of DataItem with the empty Key and value.
- /// </summary>
- public DataItem()
- {
- this.Key = String.Empty;
- this.Value = String.Empty;
- }
-
- /// <summary>
- /// Initializes a new instance of DataItem with a key and its associate value.
- /// </summary>
- /// <param name="key">The unique ID or key of this data item.</param>
- /// <param name="value">The value that this item contains.</param>
- public DataItem(string key, string value)
- {
- this.Key = key;
- this.Value = value;
- }
- }
- }
- }