PageRenderTime 51ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/DataParser/DataParser/DataParser.cs

http://kidvn-lab.googlecode.com/
C# | 389 lines | 235 code | 59 blank | 95 comment | 20 complexity | 8e58c6daba29e41d92b345b3093d6713 MD5 | raw file
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using System.Xml;
  6. using System.Data;
  7. namespace DataParser
  8. {
  9. /// <summary>
  10. /// A simple data binder from raw data.
  11. /// </summary>
  12. public class DataParser
  13. {
  14. #region VARIABLES
  15. protected const string DATA_MODEL_PATH = "data.models.xml";
  16. protected const string META_DATA_PATH = "metadata.xml";
  17. protected char[] separatorChars = new char[]{','};
  18. protected bool isReadMetadata;
  19. protected const string OUTPUT_FORMAT = "{0}{1}";
  20. /// <summary>
  21. /// Store the models of data.
  22. /// </summary>
  23. protected Hashtable dataModels;
  24. /// <summary>
  25. /// Store the filter of data.
  26. /// </summary>
  27. protected Hashtable dataFilter;
  28. /// <summary>
  29. /// Determine the key words and filter will be case-sensitive or not.
  30. /// </summary>
  31. protected bool isMatchCase;
  32. /// <summary>
  33. /// Contains the current parsed data items.
  34. /// </summary>
  35. protected List<DataItem> parsedDataItems;
  36. /// <summary>
  37. /// The table of data that parsed from the raw data.
  38. /// </summary>
  39. protected DataTable dataTable;
  40. /// <summary>
  41. /// Gets the table of data that parsed from the raw data.
  42. /// </summary>
  43. public DataTable ParsedDataTable
  44. {
  45. get { return this.dataTable; }
  46. }
  47. /// <summary>
  48. /// Gets the collection of attributes and their associate keywords.
  49. /// </summary>
  50. public Hashtable DataModels
  51. {
  52. get { return this.dataModels; }
  53. }
  54. /// <summary>
  55. /// Gets or sets the value to determine the key words and filter will be case-sensitive or not.
  56. /// The default value is TRUE.
  57. /// </summary>
  58. public bool MatchCase
  59. {
  60. get { return this.isMatchCase; }
  61. set { this.isMatchCase = value; }
  62. }
  63. protected string separator;
  64. /// <summary>
  65. /// Gets or sets the separator between each data fields.
  66. /// </summary>
  67. public string Separator
  68. {
  69. get { return this.separator; }
  70. set { this.separator = value; }
  71. }
  72. protected string[] columnsOrder;
  73. /// <summary>
  74. /// Gets the lastest order of data field parsed from raw data.
  75. /// </summary>
  76. public string[] ColumnsOrder
  77. {
  78. get { return this.columnsOrder; }
  79. }
  80. #endregion
  81. #region CONSTRUCTOR
  82. /// <summary>
  83. /// Initializes a new instance of DataParser with the default settings.
  84. /// To make it capable of parsing data, the object must be init its internal conditions first (by calling the method Init()).
  85. /// </summary>
  86. public DataParser()
  87. {
  88. this.dataModels = new Hashtable();
  89. this.dataFilter = new Hashtable();
  90. this.isReadMetadata = false;
  91. this.isMatchCase = true;
  92. this.separator = String.Empty;
  93. this.columnsOrder = null;
  94. }
  95. #endregion
  96. #region CORE METHODS
  97. /// <summary>
  98. /// Read Meta data from metadata.xml.
  99. /// </summary>
  100. protected void readMetaData()
  101. {
  102. try
  103. {
  104. XmlDocument xmldoc = new XmlDocument();
  105. xmldoc.Load(META_DATA_PATH);
  106. //get root node
  107. XmlElement elemRoot = xmldoc.DocumentElement;
  108. #region DATA MODELS
  109. XmlNodeList elemModelList = elemRoot.GetElementsByTagName(XmlConst.DATA_MODEL);
  110. XmlNodeList elemAttribList = ((XmlElement)elemModelList[0]).GetElementsByTagName(XmlConst.ATTRIB);
  111. for (int i = 0; i < elemAttribList.Count; i++)
  112. {
  113. XmlElement elemAttrib = (XmlElement)elemAttribList[i];
  114. string attribName = elemAttrib.GetAttribute(XmlConst.ATTRIB_ID);
  115. //Get child nodes <keywords>
  116. XmlNodeList elemKeys = elemAttrib.GetElementsByTagName(XmlConst.KEYWORDS);
  117. string strKeywords = elemKeys[0].InnerText;
  118. //Split the keywords and put them into hash table.
  119. string[] keyWordsArray = strKeywords.Split(separatorChars, StringSplitOptions.None);
  120. if (keyWordsArray == null || keyWordsArray.Length <= 0)
  121. continue;
  122. Hashtable keywords = new Hashtable();
  123. for (int n = 0; n < keyWordsArray.Length; n++)
  124. {
  125. string strTemp = keyWordsArray[n].Trim();
  126. keywords.Add(strTemp, strTemp);
  127. }
  128. //Add the attribute and its keywords to the hash table.
  129. this.dataModels.Add(attribName, keywords);
  130. this.isReadMetadata = true;
  131. }
  132. #endregion
  133. #region DATA FILTER
  134. XmlNodeList elemFilterList = elemRoot.GetElementsByTagName(XmlConst.DATA_FILTER);
  135. for (int i = 0; i < elemFilterList.Count; i++)
  136. {
  137. this.dataFilter.Add(elemFilterList[i].InnerText, elemFilterList[i].InnerText);
  138. }
  139. #endregion
  140. }
  141. catch (Exception)
  142. {
  143. throw;
  144. }
  145. }
  146. /// <summary>
  147. /// Initializes the data table.
  148. /// </summary>
  149. protected void initDataTable()
  150. {
  151. //The data table will be initialized only after loading the data models.
  152. if (!this.isReadMetadata)
  153. return;
  154. this.dataTable = new DataTable();
  155. foreach (string strAttrib in this.dataModels.Keys)
  156. {
  157. this.dataTable.Columns.Add(strAttrib);
  158. }
  159. }
  160. /// <summary>
  161. /// Searches and gets the fiels by checking
  162. /// </summary>
  163. /// <param name="rawData">The original data.</param>
  164. /// <returns>The structured data.</returns>
  165. protected string searchAndGet(string rawData)
  166. {
  167. string strResullt = string.Empty;
  168. string strTempData = string.Empty;
  169. if (!this.isMatchCase)
  170. strTempData = rawData.ToUpper();
  171. foreach (string strAttrib in this.dataModels.Keys)
  172. {
  173. Hashtable keywords = this.dataModels[strAttrib] as Hashtable;
  174. foreach (string strKeyword in keywords.Keys)
  175. {
  176. if (String.IsNullOrEmpty(strKeyword))
  177. continue;
  178. foreach (string filter in this.dataFilter.Values)
  179. {
  180. if (this.isMatchCase)
  181. {
  182. rawData = rawData.Replace(filter, String.Empty);
  183. }
  184. else
  185. {
  186. //Because the method Remove does not remove all occurrences of a specified characters,
  187. //we must iteratively remove all of them from the original string.
  188. //And because of non-case-sensitive we will both change the original string and the filter string to lower or upper.
  189. string strTempfilter = filter.ToUpper();
  190. //Remove all filter charaters from the original string.
  191. while (strTempData.Contains(strTempfilter))
  192. {
  193. int indexOfFilter = strTempData.IndexOf(strTempfilter);
  194. rawData = rawData.Remove(indexOfFilter, strTempfilter.Length);
  195. strTempData = strTempData.Remove(indexOfFilter, strTempfilter.Length);
  196. }
  197. }
  198. }
  199. if (this.isMatchCase && rawData.Contains(strKeyword))
  200. {
  201. string[] content = rawData.Trim().Split(new string[] { strKeyword }, StringSplitOptions.None);
  202. strResullt = String.Format(OUTPUT_FORMAT, content[1], this.separator);
  203. break;
  204. }
  205. else if (!this.isMatchCase)
  206. {
  207. string strTempKey = strKeyword.ToUpper();
  208. //Remove all filter charaters from the original string.
  209. if (strTempData.Contains(strTempKey))
  210. {
  211. int indexOfKey = strTempData.IndexOf(strTempKey);
  212. strTempKey = rawData.Trim().Remove(indexOfKey, strTempKey.Length);
  213. strResullt = String.Format(OUTPUT_FORMAT, strTempKey, this.separator);
  214. break;
  215. }
  216. }
  217. }
  218. if (!String.IsNullOrEmpty(strResullt))
  219. {
  220. this.parsedDataItems.Add(new DataItem(strAttrib, strResullt));
  221. break;
  222. }
  223. }
  224. //if(string.IsNullOrEmpty(strResullt.ToString()))
  225. // strResullt = separator;
  226. return strResullt;
  227. }
  228. /// <summary>
  229. /// Adds the just parsed data into data table.
  230. /// </summary>
  231. protected void addToDataTable()
  232. {
  233. try
  234. {
  235. DataRow row = this.dataTable.NewRow();
  236. this.columnsOrder = new string[this.parsedDataItems.Count];
  237. for (int i = 0; i < this.parsedDataItems.Count; i++)
  238. {
  239. row[parsedDataItems[i].Key] = parsedDataItems[i].Value.Replace(this.separator, String.Empty);
  240. this.columnsOrder[i] = parsedDataItems[i].Key;
  241. }
  242. this.dataTable.Rows.Add(row);
  243. }
  244. catch (Exception)
  245. {
  246. throw;
  247. }
  248. }
  249. /// <summary>
  250. /// Initializes the data parser's conditions to ready for parsing data.
  251. /// </summary>
  252. public void Init()
  253. {
  254. try
  255. {
  256. this.readMetaData();
  257. this.initDataTable();
  258. }
  259. catch (Exception)
  260. {
  261. throw;
  262. }
  263. }
  264. /// <summary>
  265. /// Parses the raw data to structured data.
  266. /// </summary>
  267. /// <param name="strInputData"></param>
  268. /// <param name="separator"></param>
  269. /// <returns></returns>
  270. public string ParseData(string[] strInputData, string separator)
  271. {
  272. try
  273. {
  274. StringBuilder outputData = new StringBuilder();
  275. this.parsedDataItems = new List<DataItem>();
  276. //Important: nead to initialize this property before parsing data.
  277. this.separator = separator;
  278. //outputData.Append(this.generateHeader(separator));
  279. for (int i = 0; i < strInputData.Length; i++)
  280. {
  281. //Skip all empty lines.
  282. if (String.IsNullOrEmpty(strInputData[i]))
  283. continue;
  284. outputData.Append(searchAndGet(strInputData[i]));
  285. //if(i == this.dataModels.Count -1)
  286. // outputData.Append(Environment.NewLine);
  287. }
  288. this.addToDataTable();
  289. return outputData.ToString();
  290. }
  291. catch (Exception)
  292. {
  293. throw;
  294. }
  295. }
  296. #endregion
  297. /// <summary>
  298. /// Represents a field or an item in the table of data.
  299. /// </summary>
  300. public class DataItem
  301. {
  302. /// <summary>
  303. /// The unique ID or key of this data item.
  304. /// </summary>
  305. public string Key;
  306. /// <summary>
  307. /// The value that this item contains.
  308. /// </summary>
  309. public string Value;
  310. /// <summary>
  311. /// Initializes a new instance of DataItem with the empty Key and value.
  312. /// </summary>
  313. public DataItem()
  314. {
  315. this.Key = String.Empty;
  316. this.Value = String.Empty;
  317. }
  318. /// <summary>
  319. /// Initializes a new instance of DataItem with a key and its associate value.
  320. /// </summary>
  321. /// <param name="key">The unique ID or key of this data item.</param>
  322. /// <param name="value">The value that this item contains.</param>
  323. public DataItem(string key, string value)
  324. {
  325. this.Key = key;
  326. this.Value = value;
  327. }
  328. }
  329. }
  330. }