PageRenderTime 52ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/encog-core/encog-core-cs/App/Analyst/CSV/Basic/BasicFile.cs

http://encog-cs.googlecode.com/
C# | 539 lines | 276 code | 71 blank | 192 comment | 24 complexity | f893674bf8d3e9893449003f6466c4f0 MD5 | raw file
  1. //
  2. // Encog(tm) Core v3.0 - .Net Version
  3. // http://www.heatonresearch.com/encog/
  4. //
  5. // Copyright 2008-2011 Heaton Research, Inc.
  6. //
  7. // Licensed under the Apache License, Version 2.0 (the "License");
  8. // you may not use this file except in compliance with the License.
  9. // You may obtain a copy of the License at
  10. //
  11. // http://www.apache.org/licenses/LICENSE-2.0
  12. //
  13. // Unless required by applicable law or agreed to in writing, software
  14. // distributed under the License is distributed on an "AS IS" BASIS,
  15. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. // See the License for the specific language governing permissions and
  17. // limitations under the License.
  18. //
  19. // For more information on Heaton Research copyrights, licenses
  20. // and trademarks visit:
  21. // http://www.heatonresearch.com/copyright
  22. //
  23. using System;
  24. using System.IO;
  25. using System.Text;
  26. using Encog.App.Analyst.Script;
  27. using Encog.App.Quant;
  28. using Encog.Util.CSV;
  29. namespace Encog.App.Analyst.CSV.Basic
  30. {
  31. /// <summary>
  32. /// Many of the Encog quant CSV processors are based upon this class. This class
  33. /// is not useful on its own. However, it does form the foundation for most Encog
  34. /// CSV file processing.
  35. /// </summary>
  36. ///
  37. public class BasicFile : QuantTask
  38. {
  39. /// <summary>
  40. /// The default report interval.
  41. /// </summary>
  42. ///
  43. private const int REPORT_INTERVAL = 10000;
  44. /// <summary>
  45. /// Most Encog CSV classes must analyze a CSV file before actually processing
  46. /// it. This property specifies if the file has been analyzed yet.
  47. /// </summary>
  48. ///
  49. private bool _analyzed;
  50. /// <summary>
  51. /// True, if the process should stop.
  52. /// </summary>
  53. ///
  54. private bool _cancel;
  55. /// <summary>
  56. /// The number of columns in the input file.
  57. /// </summary>
  58. ///
  59. private int _columnCount;
  60. /// <summary>
  61. /// The current record.
  62. /// </summary>
  63. ///
  64. private int _currentRecord;
  65. /// <summary>
  66. /// True, if input headers should be expected.
  67. /// </summary>
  68. ///
  69. private bool _expectInputHeaders;
  70. /// <summary>
  71. /// The input filename. This is the file being analyzed/processed.
  72. /// </summary>
  73. ///
  74. private FileInfo _inputFilename;
  75. /// <summary>
  76. /// The format of the input file.
  77. /// </summary>
  78. ///
  79. private CSVFormat _inputFormat;
  80. /// <summary>
  81. /// The column headings from the input file.
  82. /// </summary>
  83. ///
  84. private String[] _inputHeadings;
  85. /// <summary>
  86. /// The last time status was updated.
  87. /// </summary>
  88. ///
  89. private int _lastUpdate;
  90. /// <summary>
  91. /// The output format, usually, the same as the input format.
  92. /// </summary>
  93. ///
  94. private CSVFormat _outputFormat;
  95. /// <summary>
  96. /// Should output headers be produced?
  97. /// </summary>
  98. ///
  99. private bool _produceOutputHeaders;
  100. /// <summary>
  101. /// The number of records to process. This is determined when the file is
  102. /// analyzed.
  103. /// </summary>
  104. ///
  105. private int _recordCount;
  106. /// <summary>
  107. /// Allows status to be reported. Defaults to no status reported.
  108. /// </summary>
  109. ///
  110. private IStatusReportable _report;
  111. /// <summary>
  112. /// The number of records to process before status is updated. Defaults to
  113. /// 10k.
  114. /// </summary>
  115. ///
  116. private int _reportInterval;
  117. /// <summary>
  118. /// Construct the object, and set the defaults.
  119. /// </summary>
  120. ///
  121. public BasicFile()
  122. {
  123. Precision = EncogFramework.DefaultPrecision;
  124. _report = new NullStatusReportable();
  125. _reportInterval = REPORT_INTERVAL;
  126. _produceOutputHeaders = true;
  127. ResetStatus();
  128. }
  129. /// <summary>
  130. /// Set the column count.
  131. /// </summary>
  132. public int Count
  133. {
  134. get { return _columnCount; }
  135. set { _columnCount = value; }
  136. }
  137. /// <summary>
  138. /// Set the input filename.
  139. /// </summary>
  140. public FileInfo InputFilename
  141. {
  142. get { return _inputFilename; }
  143. set { _inputFilename = value; }
  144. }
  145. /// <summary>
  146. /// Set the input format.
  147. /// </summary>
  148. public CSVFormat InputFormat
  149. {
  150. get { return _inputFormat; }
  151. set { _inputFormat = value; }
  152. }
  153. /// <summary>
  154. /// Set the input headings.
  155. /// </summary>
  156. public String[] InputHeadings
  157. {
  158. get { return _inputHeadings; }
  159. set { _inputHeadings = value; }
  160. }
  161. /// <value>the outputFormat to set</value>
  162. public CSVFormat OutputFormat
  163. {
  164. get { return _outputFormat; }
  165. set { _outputFormat = value; }
  166. }
  167. /// <summary>
  168. /// Set the precision to use.
  169. /// </summary>
  170. public int Precision { get; set; }
  171. /// <summary>
  172. /// Set the record count.
  173. /// </summary>
  174. public int RecordCount
  175. {
  176. get
  177. {
  178. if (!_analyzed)
  179. {
  180. throw new QuantError("Must analyze file first.");
  181. }
  182. return _recordCount;
  183. }
  184. set { _recordCount = value; }
  185. }
  186. /// <summary>
  187. /// Set the status reporting object.
  188. /// </summary>
  189. public IStatusReportable Report
  190. {
  191. get { return _report; }
  192. set { _report = value; }
  193. }
  194. /// <summary>
  195. /// Set the reporting interval.
  196. /// </summary>
  197. public int ReportInterval
  198. {
  199. get { return _reportInterval; }
  200. set { _reportInterval = value; }
  201. }
  202. /// <summary>
  203. /// Set to true, if the file has been analyzed.
  204. /// </summary>
  205. public bool Analyzed
  206. {
  207. get { return _analyzed; }
  208. set { _analyzed = value; }
  209. }
  210. /// <summary>
  211. /// Set the flag to determine if we are expecting input headers.
  212. /// </summary>
  213. public bool ExpectInputHeaders
  214. {
  215. get { return _expectInputHeaders; }
  216. set { _expectInputHeaders = value; }
  217. }
  218. /// <value>the produceOutputHeaders to set</value>
  219. public bool ProduceOutputHeaders
  220. {
  221. get { return _produceOutputHeaders; }
  222. set { _produceOutputHeaders = value; }
  223. }
  224. /// <value>the script to set</value>
  225. public AnalystScript Script { get; set; }
  226. #region QuantTask Members
  227. /// <summary>
  228. /// Request a stop.
  229. /// </summary>
  230. ///
  231. public void RequestStop()
  232. {
  233. _cancel = true;
  234. }
  235. /// <returns>Should we stop?</returns>
  236. public bool ShouldStop()
  237. {
  238. return _cancel;
  239. }
  240. #endregion
  241. /// <summary>
  242. /// Append a separator. The separator will only be appended if the line is
  243. /// not empty. This is used to build comma(or other) separated lists.
  244. /// </summary>
  245. ///
  246. /// <param name="line">The line to append to.</param>
  247. /// <param name="format">The format to use.</param>
  248. public static void AppendSeparator(StringBuilder line,
  249. CSVFormat format)
  250. {
  251. if ((line.Length > 0)
  252. && !line.ToString().EndsWith(format.Separator + ""))
  253. {
  254. line.Append(format.Separator);
  255. }
  256. }
  257. /// <summary>
  258. /// Perform a basic analyze of the file. This method is used mostly
  259. /// internally.
  260. /// </summary>
  261. ///
  262. public void PerformBasicCounts()
  263. {
  264. if (_outputFormat == null)
  265. {
  266. _outputFormat = _inputFormat;
  267. }
  268. ResetStatus();
  269. int rc = 0;
  270. var csv = new ReadCSV(_inputFilename.ToString(),
  271. _expectInputHeaders, _inputFormat);
  272. while (csv.Next() && !_cancel)
  273. {
  274. UpdateStatus(true);
  275. rc++;
  276. }
  277. _recordCount = rc;
  278. _columnCount = csv.ColumnCount;
  279. ReadHeaders(csv);
  280. csv.Close();
  281. ReportDone(true);
  282. }
  283. /// <summary>
  284. /// Prepare the output file, write headers if needed.
  285. /// </summary>
  286. ///
  287. /// <param name="outputFile">The name of the output file.</param>
  288. /// <returns>The output stream for the text file.</returns>
  289. public StreamWriter PrepareOutputFile(FileInfo outputFile)
  290. {
  291. try
  292. {
  293. outputFile.Delete();
  294. var tw = new StreamWriter(outputFile.OpenWrite());
  295. if (_outputFormat == null)
  296. {
  297. _outputFormat = _inputFormat;
  298. }
  299. // write headers, if needed
  300. if (_produceOutputHeaders)
  301. {
  302. var line = new StringBuilder();
  303. if (_inputHeadings != null)
  304. {
  305. foreach (String str in _inputHeadings)
  306. {
  307. if (line.Length > 0)
  308. {
  309. line.Append(_outputFormat.Separator);
  310. }
  311. line.Append("\"");
  312. line.Append(str);
  313. line.Append("\"");
  314. }
  315. }
  316. else
  317. {
  318. for (int i = 0; i < _columnCount; i++)
  319. {
  320. line.Append("\"field:");
  321. line.Append(i + 1);
  322. line.Append("\"");
  323. }
  324. }
  325. tw.WriteLine(line.ToString());
  326. }
  327. return tw;
  328. }
  329. catch (IOException e)
  330. {
  331. throw new QuantError(e);
  332. }
  333. }
  334. /// <summary>
  335. /// Read the headers from a CSV file. Used mostly internally.
  336. /// </summary>
  337. ///
  338. /// <param name="csv">The CSV file to read from.</param>
  339. public void ReadHeaders(ReadCSV csv)
  340. {
  341. if (_expectInputHeaders)
  342. {
  343. _inputHeadings = new String[csv.ColumnCount];
  344. for (int i = 0; i < csv.ColumnCount; i++)
  345. {
  346. _inputHeadings[i] = csv.ColumnNames[i];
  347. }
  348. }
  349. else
  350. {
  351. _inputHeadings = new String[csv.ColumnCount];
  352. int i = 0;
  353. if (Script != null)
  354. {
  355. foreach (DataField field in Script.Fields)
  356. {
  357. _inputHeadings[i++] = field.Name;
  358. }
  359. }
  360. while (i < csv.ColumnCount)
  361. {
  362. _inputHeadings[i] = "field:" + i;
  363. i++;
  364. }
  365. }
  366. }
  367. /// <summary>
  368. /// Report that we are done. Used internally.
  369. /// </summary>
  370. ///
  371. /// <param name="isAnalyzing">True if we are analyzing.</param>
  372. public void ReportDone(bool isAnalyzing)
  373. {
  374. _report.Report(_recordCount, _recordCount,
  375. isAnalyzing ? "Done analyzing" : "Done processing");
  376. }
  377. /// <summary>
  378. /// Report that we are done. Used internally.
  379. /// </summary>
  380. ///
  381. /// <param name="task">The message.</param>
  382. public void ReportDone(String task)
  383. {
  384. _report.Report(_recordCount, _recordCount, task);
  385. }
  386. /// <summary>
  387. /// Reset the reporting stats. Used internally.
  388. /// </summary>
  389. ///
  390. public void ResetStatus()
  391. {
  392. _lastUpdate = 0;
  393. _currentRecord = 0;
  394. }
  395. /// <inheritdoc/>
  396. public override sealed String ToString()
  397. {
  398. var result = new StringBuilder("[");
  399. result.Append(GetType().Name);
  400. result.Append(" inputFilename=");
  401. result.Append(_inputFilename);
  402. result.Append(", recordCount=");
  403. result.Append(_recordCount);
  404. result.Append("]");
  405. return result.ToString();
  406. }
  407. /// <summary>
  408. /// Update the status. Used internally.
  409. /// </summary>
  410. ///
  411. /// <param name="isAnalyzing">True if we are in the process of analyzing.</param>
  412. public void UpdateStatus(bool isAnalyzing)
  413. {
  414. UpdateStatus(isAnalyzing ? "Analyzing" : "Processing");
  415. }
  416. /// <summary>
  417. /// Report the current status.
  418. /// </summary>
  419. ///
  420. /// <param name="task">The string to report.</param>
  421. public void UpdateStatus(String task)
  422. {
  423. bool shouldDisplay = false;
  424. if (_currentRecord == 0)
  425. {
  426. shouldDisplay = true;
  427. }
  428. _currentRecord++;
  429. _lastUpdate++;
  430. if (_lastUpdate > _reportInterval)
  431. {
  432. _lastUpdate = 0;
  433. shouldDisplay = true;
  434. }
  435. if (shouldDisplay)
  436. {
  437. _report.Report(_recordCount, _currentRecord, task);
  438. }
  439. }
  440. /// <summary>
  441. /// Validate that the file has been analyzed. Throw an error, if it has not.
  442. /// </summary>
  443. ///
  444. public void ValidateAnalyzed()
  445. {
  446. if (!_analyzed)
  447. {
  448. throw new QuantError("File must be analyzed first.");
  449. }
  450. }
  451. /// <summary>
  452. /// Write a row to the output file.
  453. /// </summary>
  454. ///
  455. /// <param name="tw">The output stream.</param>
  456. /// <param name="row">The row to write out.</param>
  457. public void WriteRow(StreamWriter tw, LoadedRow row)
  458. {
  459. var line = new StringBuilder();
  460. foreach (string t in row.Data)
  461. {
  462. AppendSeparator(line, _outputFormat);
  463. line.Append(t);
  464. }
  465. tw.WriteLine(line.ToString());
  466. }
  467. }
  468. }