PageRenderTime 56ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/Languages/IronPython/IronPython.Modules/_csv.cs

http://github.com/IronLanguages/main
C# | 1220 lines | 1034 code | 149 blank | 37 comment | 279 complexity | 325a960b5b78e0bc10e2e5f317c5e5d1 MD5 | raw file
Possible License(s): CPL-1.0, BSD-3-Clause, ISC, GPL-2.0, MPL-2.0-no-copyleft-exception
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Runtime.CompilerServices;
  5. using System.Text;
  6. using IronPython.Runtime;
  7. using IronPython.Runtime.Exceptions;
  8. using IronPython.Runtime.Operations;
  9. using IronPython.Runtime.Types;
  10. using Microsoft.Scripting;
  11. using Microsoft.Scripting.Runtime;
  12. [assembly: PythonModule("_csv", typeof(IronPython.Modules.PythonCsvModule))]
  13. namespace IronPython.Modules
  14. {
  15. using DialectRegistry = Dictionary<string, PythonCsvModule.Dialect>;
  16. public static class PythonCsvModule
  17. {
  18. public const string __doc__ = "";
  19. public const string __version__ = "1.0";
  20. public const int QUOTE_MINIMAL = 0;
  21. public const int QUOTE_ALL = 1;
  22. public const int QUOTE_NONNUMERIC = 2;
  23. public const int QUOTE_NONE = 3;
  24. private static readonly object _fieldSizeLimitKey = new object();
  25. private static readonly object _dialectRegistryKey = new object();
  26. private const int FieldSizeLimit = 128 * 1024; /* max parsed field size */
  27. [SpecialName]
  28. public static void PerformModuleReload(PythonContext context, PythonDictionary dict)
  29. {
  30. if (!context.HasModuleState(_fieldSizeLimitKey))
  31. {
  32. context.SetModuleState(_fieldSizeLimitKey, FieldSizeLimit);
  33. }
  34. if (!context.HasModuleState(_dialectRegistryKey))
  35. {
  36. context.SetModuleState(_dialectRegistryKey,
  37. new DialectRegistry());
  38. }
  39. InitModuleExceptions(context, dict);
  40. }
  41. public static int field_size_limit(CodeContext /*!*/ context, int new_limit)
  42. {
  43. PythonContext ctx = PythonContext.GetContext(context);
  44. int old_limit = (int)ctx.GetModuleState(_fieldSizeLimitKey);
  45. ctx.SetModuleState(_fieldSizeLimitKey, new_limit);
  46. return old_limit;
  47. }
  48. public static int field_size_limit(CodeContext/*!*/ context)
  49. {
  50. return (int)PythonContext.GetContext(context).
  51. GetModuleState(_fieldSizeLimitKey);
  52. }
  53. [Documentation(@"Create a mapping from a string name to a dialect class.
  54. dialect = csv.register_dialect(name, dialect)")]
  55. public static void register_dialect(CodeContext/*!*/ context,
  56. [ParamDictionary] IDictionary<object, object> kwArgs,
  57. params object[] args)
  58. {
  59. string name = null;
  60. object dialectObj = null;
  61. Dialect dialect = null;
  62. if (args.Length < 1)
  63. {
  64. throw PythonOps.TypeError("expected at least 1 arguments, got {0}",
  65. args.Length);
  66. }
  67. if (args.Length > 2)
  68. {
  69. throw PythonOps.TypeError("expected at most 2 arguments, got {0}",
  70. args.Length);
  71. }
  72. name = args[0] as string;
  73. if (name == null)
  74. {
  75. throw PythonOps.TypeError(
  76. "dialect name must be a string or unicode");
  77. }
  78. if (args.Length > 1)
  79. dialectObj = args[1];
  80. dialect = (dialectObj != null) ?
  81. Dialect.Create(context, kwArgs, dialectObj) :
  82. Dialect.Create(context, kwArgs);
  83. if (dialect != null)
  84. GetDialects(context)[name] = dialect;
  85. }
  86. /// <summary>
  87. /// Returns the dialects from the code context.
  88. /// </summary>
  89. /// <param name="context"></param>
  90. /// <returns></returns>
  91. private static DialectRegistry GetDialects(CodeContext/*!*/ context)
  92. {
  93. PythonContext ctx = PythonContext.GetContext(context);
  94. if (!ctx.HasModuleState(_dialectRegistryKey))
  95. {
  96. ctx.SetModuleState(_dialectRegistryKey,
  97. new DialectRegistry());
  98. }
  99. return (DialectRegistry)ctx.GetModuleState(_dialectRegistryKey);
  100. }
  101. private static int GetFieldSizeLimit(CodeContext/*!*/ context)
  102. {
  103. PythonContext ctx = PythonContext.GetContext(context);
  104. if (!ctx.HasModuleState(_fieldSizeLimitKey))
  105. {
  106. ctx.SetModuleState(_fieldSizeLimitKey, FieldSizeLimit);
  107. }
  108. return (int)ctx.GetModuleState(_fieldSizeLimitKey);
  109. }
  110. [Documentation(@"Delete the name/dialect mapping associated with a string name.\n
  111. csv.unregister_dialect(name)")]
  112. public static void unregister_dialect(CodeContext/*!*/ context,
  113. string name)
  114. {
  115. DialectRegistry dialects = GetDialects(context);
  116. if (name == null || !dialects.ContainsKey(name))
  117. throw MakeError("unknown dialect");
  118. if (dialects.ContainsKey(name))
  119. dialects.Remove(name);
  120. }
  121. [Documentation(@"Return the dialect instance associated with name.
  122. dialect = csv.get_dialect(name)")]
  123. public static object get_dialect(CodeContext/*!*/ context, string name)
  124. {
  125. DialectRegistry dialects = GetDialects(context);
  126. if (name == null || !dialects.ContainsKey(name))
  127. throw MakeError("unknown dialect");
  128. return dialects[name];
  129. }
  130. [Documentation(@"Return a list of all know dialect names
  131. names = csv.list_dialects()")]
  132. public static List list_dialects(CodeContext/*!*/ context)
  133. {
  134. return new List(GetDialects(context).Keys);
  135. }
  136. [Documentation(@"csv_reader = reader(iterable [, dialect='excel']
  137. [optional keyword args])
  138. for row in csv_reader:
  139. process(row)
  140. The ""iterable"" argument can be any object that returns a line
  141. of input for each iteration, such as a file object or a list. The
  142. optional ""dialect"" parameter is discussed below. The function
  143. also accepts optional keyword arguments which override settings
  144. provided by the dialect.
  145. The returned object is an iterator. Each iteration returns a row
  146. of the CSV file (which can span multiple input lines)")]
  147. public static object reader(CodeContext/*!*/ context,
  148. [ParamDictionary] IDictionary<object, object> kwArgs,
  149. params object[] args)
  150. {
  151. object dialectObj = null;
  152. Dialect dialect = null;
  153. IEnumerator e = null;
  154. DialectRegistry dialects = GetDialects(context);
  155. if (args.Length < 1)
  156. {
  157. throw PythonOps.TypeError(
  158. "expected at least 1 arguments, got {0}",
  159. args.Length);
  160. }
  161. if (args.Length > 2)
  162. {
  163. throw PythonOps.TypeError(
  164. "expected at most 2 arguments, got {0}",
  165. args.Length);
  166. }
  167. if (!PythonOps.TryGetEnumerator(context, args[0], out e))
  168. {
  169. throw PythonOps.TypeError("argument 1 must be an iterator");
  170. }
  171. if (args.Length > 1)
  172. dialectObj = args[1];
  173. if (dialectObj is string && !dialects.ContainsKey((string)dialectObj))
  174. throw MakeError("unknown dialect");
  175. else if (dialectObj is string)
  176. {
  177. dialect = dialects[(string)dialectObj];
  178. dialectObj = dialect;
  179. }
  180. dialect = dialectObj != null ?
  181. Dialect.Create(context, kwArgs, dialectObj) :
  182. Dialect.Create(context, kwArgs);
  183. return new Reader(context, e, dialect);
  184. }
  185. public static object writer(CodeContext/*!*/ context,
  186. [ParamDictionary] IDictionary<object, object> kwArgs,
  187. params object[] args)
  188. {
  189. object output_file = null;
  190. object dialectObj = null;
  191. Dialect dialect = null;
  192. DialectRegistry dialects = GetDialects(context);
  193. if (args.Length < 1)
  194. {
  195. throw PythonOps.TypeError("expected at least 1 arguments, got {0}",
  196. args.Length);
  197. }
  198. if (args.Length > 2)
  199. {
  200. throw PythonOps.TypeError("expected at most 2 arguments, got {0}",
  201. args.Length);
  202. }
  203. output_file = args[0];
  204. if (args.Length > 1)
  205. dialectObj = args[1];
  206. if (dialectObj is string && !dialects.ContainsKey((string)dialectObj))
  207. throw MakeError("unknown dialect");
  208. else if (dialectObj is string)
  209. {
  210. dialect = dialects[(string)dialectObj];
  211. dialectObj = dialect;
  212. }
  213. dialect = dialectObj != null ?
  214. Dialect.Create(context, kwArgs, dialectObj) :
  215. Dialect.Create(context, kwArgs);
  216. return new Writer(context, output_file, dialect);
  217. }
  218. [Documentation(@"CSV dialect
  219. The Dialect type records CSV parsing and generation options.")]
  220. [PythonType]
  221. public class Dialect
  222. {
  223. private string _delimiter = ",";
  224. private string _escapechar = null;
  225. private bool _skipinitialspace;
  226. private bool _doublequote = true;
  227. private bool _strict;
  228. private int _quoting = QUOTE_MINIMAL;
  229. private string _quotechar = "\"";
  230. private string _lineterminator = "\r\n";
  231. private static readonly string[] VALID_KWARGS = {
  232. "dialect",
  233. "delimiter",
  234. "doublequote",
  235. "escapechar",
  236. "lineterminator",
  237. "quotechar",
  238. "quoting",
  239. "skipinitialspace",
  240. "strict"};
  241. private Dialect()
  242. {
  243. }
  244. public static Dialect Create(CodeContext/*!*/ context,
  245. [ParamDictionary] IDictionary<object, object> kwArgs,
  246. params object[] args)
  247. {
  248. object dialect = null;
  249. object delimiter = null;
  250. object doublequote = null;
  251. object escapechar = null;
  252. object lineterminator = null;
  253. object quotechar = null;
  254. object quoting = null;
  255. object skipinitialspace = null;
  256. object strict = null;
  257. DialectRegistry dialects = GetDialects(context);
  258. if (args.Length > 0 && args[0] != null)
  259. dialect = args[0];
  260. if (dialect == null)
  261. kwArgs.TryGetValue("dialect", out dialect);
  262. kwArgs.TryGetValue("delimiter", out delimiter);
  263. kwArgs.TryGetValue("doublequote", out doublequote);
  264. kwArgs.TryGetValue("escapechar", out escapechar);
  265. kwArgs.TryGetValue("lineterminator", out lineterminator);
  266. kwArgs.TryGetValue("quotechar", out quotechar);
  267. kwArgs.TryGetValue("quoting", out quoting);
  268. kwArgs.TryGetValue("skipinitialspace", out skipinitialspace);
  269. kwArgs.TryGetValue("strict", out strict);
  270. if (dialect != null)
  271. {
  272. if (dialect is string)
  273. {
  274. string dialectName = (string)dialect;
  275. if (dialects.ContainsKey(dialectName))
  276. dialect = dialects[dialectName];
  277. else
  278. throw MakeError("unknown dialect");
  279. }
  280. if (dialect is Dialect &&
  281. delimiter == null &&
  282. doublequote == null &&
  283. escapechar == null &&
  284. lineterminator == null &&
  285. quotechar == null &&
  286. quoting == null &&
  287. skipinitialspace == null &&
  288. strict == null)
  289. {
  290. return dialect as Dialect;
  291. }
  292. }
  293. Dialect result = dialect != null ?
  294. new Dialect(context, kwArgs, dialect) :
  295. new Dialect(context, kwArgs);
  296. return result;
  297. }
  298. [SpecialName]
  299. public void DeleteMember(CodeContext/*!*/ context, string name)
  300. {
  301. if (string.Compare(name, "delimiter") == 0 ||
  302. string.Compare(name, "skipinitialspace") == 0 ||
  303. string.Compare(name, "doublequote") == 0 ||
  304. string.Compare(name, "strict") == 0)
  305. {
  306. throw PythonOps.TypeError("readonly attribute");
  307. }
  308. else if (string.Compare(name, "escapechar") == 0 ||
  309. string.Compare(name, "lineterminator") == 0 ||
  310. string.Compare(name, "quotechar") == 0 ||
  311. string.Compare(name, "quoting") == 0)
  312. {
  313. throw PythonOps.AttributeError("attribute '{0}' of " +
  314. "'_csv.Dialect' objects is not writable", name);
  315. }
  316. else
  317. {
  318. throw PythonOps.AttributeError("'_csv.Dialect' object " +
  319. "has no attribute '{0}'", name);
  320. }
  321. }
  322. [SpecialName]
  323. public void SetMember(CodeContext/*!*/ context, string name, object value)
  324. {
  325. if (string.Compare(name, "delimiter") == 0 ||
  326. string.Compare(name, "skipinitialspace") == 0 ||
  327. string.Compare(name, "doublequote") == 0 ||
  328. string.Compare(name, "strict") == 0)
  329. {
  330. throw PythonOps.TypeError("readonly attribute");
  331. }
  332. else if (string.Compare(name, "escapechar") == 0 ||
  333. string.Compare(name, "lineterminator") == 0 ||
  334. string.Compare(name, "quotechar") == 0 ||
  335. string.Compare(name, "quoting") == 0)
  336. {
  337. throw PythonOps.AttributeError("attribute '{0}' of " +
  338. "'_csv.Dialect' objects is not writable", name);
  339. }
  340. else
  341. {
  342. throw PythonOps.AttributeError("'_csv.Dialect' object " +
  343. "has no attribute '{0}'", name);
  344. }
  345. }
  346. #region Parameter Setting
  347. static int SetInt(string name, object src, bool found, int @default)
  348. {
  349. int result = @default;
  350. if (found)
  351. {
  352. if (!(src is int))
  353. {
  354. throw PythonOps.TypeError("\"{0}\" must be an integer",
  355. name);
  356. }
  357. result = (int)src;
  358. }
  359. return result;
  360. }
  361. static bool SetBool(string name, object src, bool found, bool @default)
  362. {
  363. bool result = @default;
  364. if (found)
  365. result = PythonOps.IsTrue(src);
  366. return result;
  367. }
  368. static string SetChar(string name, object src, bool found, string @default)
  369. {
  370. string result = @default;
  371. if (found)
  372. {
  373. if (src == null)
  374. result = null;
  375. else if (src is string)
  376. {
  377. string source = src as string;
  378. if (source.Length == 0)
  379. result = null;
  380. else if (source.Length != 1)
  381. {
  382. throw PythonOps.TypeError(
  383. "\"{0}\" must be an 1-character string",
  384. name);
  385. }
  386. else
  387. result = source.Substring(0, 1);
  388. }
  389. else
  390. {
  391. throw PythonOps.TypeError(
  392. "\"{0}\" must be string, not {1}", name, PythonOps.GetPythonTypeName(src));
  393. }
  394. }
  395. return result;
  396. }
  397. static string SetString(string name, object src, bool found, string @default)
  398. {
  399. string result = @default;
  400. if (found)
  401. {
  402. if (src == null)
  403. result = null;
  404. else if (!(src is string))
  405. {
  406. throw PythonOps.TypeError(
  407. "\"{0}\" must be a string", name);
  408. }
  409. else
  410. {
  411. result = src as string;
  412. }
  413. }
  414. return result;
  415. }
  416. #endregion
  417. public Dialect(CodeContext/*!*/ context,
  418. [ParamDictionary] IDictionary<object, object> kwArgs,
  419. params object[] args)
  420. {
  421. object dialect = null;
  422. object delimiter = null;
  423. object doublequote = null;
  424. object escapechar = null;
  425. object lineterminator = null;
  426. object quotechar = null;
  427. object quoting = null;
  428. object skipinitialspace = null;
  429. object strict = null;
  430. Dictionary<string, bool> foundParams =
  431. new Dictionary<string, bool>();
  432. foreach (object key in kwArgs.Keys)
  433. {
  434. if (Array.IndexOf(VALID_KWARGS, key) < 0)
  435. {
  436. throw PythonOps.TypeError("'{0}' is an invalid " +
  437. "keyword argument for this function", key);
  438. }
  439. }
  440. if (args.Length > 0 && args[0] != null)
  441. {
  442. dialect = args[0];
  443. foundParams["dialect"] = true;
  444. }
  445. if (dialect == null)
  446. {
  447. foundParams["dialect"] =
  448. kwArgs.TryGetValue("dialect", out dialect);
  449. }
  450. foundParams["delimiter"] = kwArgs.TryGetValue("delimiter", out delimiter);
  451. foundParams["doublequote"] = kwArgs.TryGetValue("doublequote", out doublequote);
  452. foundParams["escapechar"] = kwArgs.TryGetValue("escapechar", out escapechar);
  453. foundParams["lineterminator"] = kwArgs.TryGetValue("lineterminator", out lineterminator);
  454. foundParams["quotechar"] = kwArgs.TryGetValue("quotechar", out quotechar);
  455. foundParams["quoting"] = kwArgs.TryGetValue("quoting", out quoting);
  456. foundParams["skipinitialspace"] = kwArgs.TryGetValue("skipinitialspace", out skipinitialspace);
  457. foundParams["strict"] = kwArgs.TryGetValue("strict", out strict);
  458. if (dialect != null)
  459. {
  460. if (!foundParams["delimiter"] && delimiter == null)
  461. foundParams["delimiter"] = PythonOps.TryGetBoundAttr(dialect, "delimiter", out delimiter);
  462. if (!foundParams["doublequote"] && doublequote == null)
  463. foundParams["doublequote"] = PythonOps.TryGetBoundAttr(dialect, "doublequote", out doublequote);
  464. if (!foundParams["escapechar"] && escapechar == null)
  465. foundParams["escapechar"] = PythonOps.TryGetBoundAttr(dialect, "escapechar", out escapechar);
  466. if (!foundParams["lineterminator"] && lineterminator == null)
  467. foundParams["lineterminator"] = PythonOps.TryGetBoundAttr(dialect, "lineterminator", out lineterminator);
  468. if (!foundParams["quotechar"] && quotechar == null)
  469. foundParams["quotechar"] = PythonOps.TryGetBoundAttr(dialect, "quotechar", out quotechar);
  470. if (!foundParams["quoting"] && quoting == null)
  471. foundParams["quoting"] = PythonOps.TryGetBoundAttr(dialect, "quoting", out quoting);
  472. if (!foundParams["skipinitialspace"] && skipinitialspace == null)
  473. foundParams["skipinitialspace"] = PythonOps.TryGetBoundAttr(dialect, "skipinitialspace", out skipinitialspace);
  474. if (!foundParams["strict"] && strict == null)
  475. foundParams["strict"] = PythonOps.TryGetBoundAttr(dialect, "strict", out strict);
  476. }
  477. _delimiter = SetChar("delimiter", delimiter,
  478. foundParams["delimiter"], ",");
  479. _doublequote = SetBool("doublequote", doublequote,
  480. foundParams["doublequote"], true);
  481. _escapechar = SetString("escapechar", escapechar,
  482. foundParams["escapechar"], null);
  483. _lineterminator = SetString("lineterminator",
  484. lineterminator, foundParams["lineterminator"], "\r\n");
  485. _quotechar = SetChar("quotechar", quotechar,
  486. foundParams["quotechar"], "\"");
  487. _quoting = SetInt("quoting", quoting,
  488. foundParams["quoting"], QUOTE_MINIMAL);
  489. _skipinitialspace = SetBool("skipinitialspace",
  490. skipinitialspace, foundParams["skipinitialspace"], false);
  491. _strict = SetBool("strict", strict, foundParams["strict"], false);
  492. // validate options
  493. if (_quoting < QUOTE_MINIMAL || _quoting > QUOTE_NONE)
  494. throw PythonOps.TypeError("bad \"quoting\" value");
  495. if (string.IsNullOrEmpty(_delimiter))
  496. throw PythonOps.TypeError("\"delimiter\" must be an 1-character string");
  497. if ((foundParams["quotechar"] && quotechar == null) && quoting == null)
  498. _quoting = QUOTE_NONE;
  499. if (_quoting != QUOTE_NONE && string.IsNullOrEmpty(_quotechar))
  500. throw PythonOps.TypeError("quotechar must be set if quoting enabled");
  501. if (_lineterminator == null)
  502. throw PythonOps.TypeError("lineterminator must be set");
  503. }
  504. public string escapechar
  505. {
  506. get { return _escapechar; }
  507. }
  508. public string delimiter
  509. {
  510. get { return _delimiter; }
  511. }
  512. public bool skipinitialspace
  513. {
  514. get { return _skipinitialspace; }
  515. }
  516. public bool doublequote
  517. {
  518. get { return _doublequote; }
  519. }
  520. public string lineterminator
  521. {
  522. get { return _lineterminator; }
  523. }
  524. public bool strict
  525. {
  526. get { return _strict; }
  527. }
  528. public int quoting
  529. {
  530. get { return _quoting; }
  531. }
  532. public string quotechar
  533. {
  534. get { return _quotechar; }
  535. }
  536. }
  537. [Documentation(@"CSV reader
  538. Reader objects are responsible for reading and parsing tabular data
  539. in CSV format.")]
  540. [PythonType]
  541. public class Reader : IEnumerable
  542. {
  543. private IEnumerator _input_iter;
  544. private Dialect _dialect;
  545. private int _line_num;
  546. private ReaderIterator _iterator;
  547. public Reader(CodeContext/*!*/ context, IEnumerator input_iter,
  548. Dialect dialect)
  549. {
  550. _input_iter = input_iter;
  551. _dialect = dialect;
  552. _iterator = new ReaderIterator(context, this);
  553. }
  554. public object next()
  555. {
  556. if (!_iterator.MoveNext())
  557. throw PythonOps.StopIteration();
  558. return _iterator.Current;
  559. }
  560. #region IEnumerable Members
  561. public IEnumerator GetEnumerator()
  562. {
  563. return _iterator;
  564. }
  565. private sealed class ReaderIterator : IEnumerator, IEnumerable
  566. {
  567. private CodeContext _context;
  568. private Reader _reader;
  569. private List _fields = new List();
  570. private bool _is_numeric_field;
  571. private State _state = State.StartRecord;
  572. private StringBuilder _field = new StringBuilder();
  573. private IEnumerator _iterator;
  574. enum State
  575. {
  576. StartRecord,
  577. StartField,
  578. EscapedChar,
  579. InField,
  580. InQuotedField,
  581. EscapeInQuotedField,
  582. QuoteInQuotedField,
  583. EatCrNl
  584. }
  585. public ReaderIterator(CodeContext/*!*/ context, Reader reader)
  586. {
  587. _context = context;
  588. _reader = reader;
  589. _iterator = _reader._input_iter;
  590. }
  591. #region IEnumerator Members
  592. public object Current
  593. {
  594. get { return new List(_fields); }
  595. }
  596. public bool MoveNext()
  597. {
  598. bool result = false;
  599. Reset();
  600. do
  601. {
  602. object lineobj = null;
  603. if (!_iterator.MoveNext())
  604. {
  605. // End of input OR exception
  606. if(_field.Length > 0 || _state == State.InQuotedField) {
  607. if(_reader._dialect.strict) {
  608. throw MakeError("unexpected end of data");
  609. } else {
  610. ParseSaveField();
  611. return true;
  612. }
  613. }
  614. return false;
  615. }
  616. else
  617. {
  618. lineobj = _iterator.Current;
  619. }
  620. _reader._line_num++;
  621. if (lineobj is char)
  622. lineobj = lineobj.ToString();
  623. if (!(lineobj is string))
  624. {
  625. throw PythonOps.TypeError("expected string or " +
  626. "Unicode object, {0} found",
  627. DynamicHelpers.GetPythonType(lineobj.GetType()));
  628. }
  629. string line = lineobj as string;
  630. if (!string.IsNullOrEmpty(line))
  631. {
  632. for (int i = 0; i < line.Length; i++)
  633. {
  634. char c = line[i];
  635. if (c == '\0')
  636. throw MakeError("line contains NULL byte");
  637. ProcessChar(c);
  638. }
  639. }
  640. ProcessChar('\0');
  641. result = true;
  642. } while (_state != State.StartRecord);
  643. return result;
  644. }
  645. public void Reset()
  646. {
  647. _state = State.StartRecord;
  648. _fields.Clear();
  649. _is_numeric_field = false;
  650. _field.Clear();
  651. }
  652. #endregion
  653. #region IEnumerable Members
  654. public IEnumerator GetEnumerator()
  655. {
  656. return this;
  657. }
  658. #endregion
  659. private void ProcessChar(char c)
  660. {
  661. Dialect dialect = _reader._dialect;
  662. switch (_state)
  663. {
  664. case State.StartRecord:
  665. // start of record
  666. if (c == '\0')
  667. {
  668. // empty line, will return empty list
  669. break;
  670. }
  671. else if (c == '\n' || c == '\r')
  672. {
  673. _state = State.EatCrNl;
  674. break;
  675. }
  676. // normal character, handle as start of field
  677. _state = State.StartField;
  678. goto case State.StartField;
  679. case State.StartField:
  680. // expecting field
  681. if (c == '\n' || c == '\r' || c == '\0')
  682. {
  683. // save empty field - return [fields]
  684. ParseSaveField();
  685. _state = (c == '\0' ?
  686. State.StartRecord : State.EatCrNl);
  687. }
  688. else if (!string.IsNullOrEmpty(dialect.quotechar) &&
  689. c == dialect.quotechar[0] &&
  690. dialect.quoting != QUOTE_NONE)
  691. {
  692. // start quoted field
  693. _state = State.InQuotedField;
  694. }
  695. else if (!string.IsNullOrEmpty(dialect.escapechar) &&
  696. c == dialect.escapechar[0])
  697. {
  698. // possible escaped char
  699. _state = State.EscapedChar;
  700. }
  701. else if (c == ' ' && dialect.skipinitialspace)
  702. {
  703. // ignore space at start of field
  704. }
  705. else if (c == dialect.delimiter[0])
  706. {
  707. // save empty field
  708. ParseSaveField();
  709. }
  710. else
  711. {
  712. // begin new unquoted field
  713. if (dialect.quoting == QUOTE_NONNUMERIC)
  714. _is_numeric_field = true;
  715. ParseAddChar(c);
  716. _state = State.InField;
  717. }
  718. break;
  719. case State.EscapedChar:
  720. if (c == '\0')
  721. c = '\n';
  722. ParseAddChar(c);
  723. _state = State.InField;
  724. break;
  725. case State.InField:
  726. // in unquoted field
  727. if (c == '\n' || c == '\r' || c == '\0')
  728. {
  729. // end of line, return [fields]
  730. ParseSaveField();
  731. _state = (c == '\0' ? State.StartRecord : State.EatCrNl);
  732. }
  733. else if (!string.IsNullOrEmpty(dialect.escapechar) &&
  734. c == dialect.escapechar[0])
  735. {
  736. // possible escaped character
  737. _state = State.EscapedChar;
  738. }
  739. else if (c == dialect.delimiter[0])
  740. {
  741. // save field - wait for new field
  742. ParseSaveField();
  743. _state = State.StartField;
  744. }
  745. else
  746. {
  747. // normal character - save in field
  748. ParseAddChar(c);
  749. }
  750. break;
  751. case State.InQuotedField:
  752. // in quoted field
  753. if (c == '\0')
  754. {
  755. // ignore null character
  756. }
  757. else if (!string.IsNullOrEmpty(dialect.escapechar) &&
  758. c == dialect.escapechar[0])
  759. {
  760. // possible escape character
  761. _state = State.EscapeInQuotedField;
  762. }
  763. else if (!string.IsNullOrEmpty(dialect.quotechar) &&
  764. c == dialect.quotechar[0] &&
  765. dialect.quoting != QUOTE_NONE)
  766. {
  767. if (dialect.doublequote)
  768. {
  769. // doublequote; " represented by ""
  770. _state = State.QuoteInQuotedField;
  771. }
  772. else
  773. {
  774. // end of quote part of field
  775. _state = State.InField;
  776. }
  777. }
  778. else
  779. {
  780. // normal character - save in field
  781. ParseAddChar(c);
  782. }
  783. break;
  784. case State.EscapeInQuotedField:
  785. if (c == '\0')
  786. c = '\n';
  787. ParseAddChar(c);
  788. _state = State.InQuotedField;
  789. break;
  790. case State.QuoteInQuotedField:
  791. // doublequote - seen a quote in a quoted field
  792. if (dialect.quoting != QUOTE_NONE &&
  793. c == dialect.quotechar[0])
  794. {
  795. // save "" as "
  796. ParseAddChar(c);
  797. _state = State.InQuotedField;
  798. }
  799. else if (c == dialect.delimiter[0])
  800. {
  801. // save field - wait for new field
  802. ParseSaveField();
  803. _state = State.StartField;
  804. }
  805. else if (c == '\n' || c == '\r' || c == '\0')
  806. {
  807. // end of line - return [fields]
  808. ParseSaveField();
  809. _state = (c == '\0' ? State.StartRecord : State.EatCrNl);
  810. }
  811. else if (!dialect.strict)
  812. {
  813. ParseAddChar(c);
  814. _state = State.InField;
  815. }
  816. else
  817. {
  818. // illegal!
  819. throw MakeError("'{0}' expected after '{1}'",
  820. dialect.delimiter, dialect.quotechar);
  821. }
  822. break;
  823. case State.EatCrNl:
  824. if (c == '\n' || c == '\r')
  825. {
  826. // eat the CR NL
  827. }
  828. else if (c == '\0')
  829. _state = State.StartRecord;
  830. else
  831. {
  832. throw MakeError("new-line character seen " +
  833. "in unquoted field - do you need to open" +
  834. " the file in universal-newline mode?");
  835. }
  836. break;
  837. }
  838. }
  839. private void ParseAddChar(char c)
  840. {
  841. int field_size_limit = GetFieldSizeLimit(_context);
  842. if (_field.Length >= field_size_limit)
  843. {
  844. throw MakeError(
  845. string.Format("field larger than field " +
  846. "limit ({0})", field_size_limit));
  847. }
  848. _field.Append(c);
  849. }
  850. private void ParseSaveField()
  851. {
  852. string field = _field.ToString();
  853. if (_is_numeric_field)
  854. {
  855. _is_numeric_field = false;
  856. double tmp;
  857. if (double.TryParse(field, out tmp))
  858. {
  859. if (field.Contains("."))
  860. _fields.Add(tmp);
  861. else
  862. _fields.Add((int)tmp);
  863. }
  864. else
  865. {
  866. throw PythonOps.ValueError(
  867. "invalid literal for float(): {0}", field);
  868. }
  869. }
  870. else
  871. _fields.Add(field);
  872. _field.Clear();
  873. }
  874. }
  875. #endregion
  876. public object dialect
  877. {
  878. get { return _dialect; }
  879. }
  880. public int line_num
  881. {
  882. get { return _line_num; }
  883. }
  884. }
  885. [Documentation(@"CSV writer
  886. Writer objects are responsible for generating tabular data
  887. in CSV format from sequence input.")]
  888. [PythonType]
  889. public class Writer
  890. {
  891. private Dialect _dialect;
  892. private object _writeline;
  893. private List<string> _rec = new List<string>();
  894. private int _num_fields;
  895. public Writer(CodeContext/*!*/ context, object output_file,
  896. Dialect dialect)
  897. {
  898. _dialect = dialect;
  899. if (!PythonOps.TryGetBoundAttr(
  900. output_file, "write", out _writeline) ||
  901. _writeline == null ||
  902. !PythonOps.IsCallable(context, _writeline))
  903. {
  904. throw PythonOps.TypeError(
  905. "argument 1 must have a \"write\" method");
  906. }
  907. }
  908. public object dialect
  909. {
  910. get { return _dialect; }
  911. }
  912. [Documentation(@"writerow(sequence)
  913. Construct and write a CSV record from a sequence of fields. Non-string
  914. elements will be converted to string.")]
  915. public void writerow(CodeContext/*!*/ context, object sequence)
  916. {
  917. IEnumerator e = null;
  918. if (!PythonOps.TryGetEnumerator(context, sequence, out e))
  919. throw MakeError("sequence expected");
  920. int rowlen = PythonOps.Length(sequence);
  921. // join all fields in internal buffer
  922. JoinReset();
  923. while (e.MoveNext())
  924. {
  925. object field = e.Current;
  926. bool quoted = false;
  927. switch (_dialect.quoting)
  928. {
  929. case QUOTE_NONNUMERIC:
  930. quoted = !(PythonOps.CheckingConvertToFloat(field) ||
  931. PythonOps.CheckingConvertToInt(field) ||
  932. PythonOps.CheckingConvertToLong(field));
  933. break;
  934. case QUOTE_ALL:
  935. quoted = true;
  936. break;
  937. }
  938. if (field is string)
  939. JoinAppend((string)field, quoted, rowlen == 1);
  940. else if (field is double)
  941. {
  942. JoinAppend(DoubleOps.__repr__(context, (double)field),
  943. quoted, rowlen == 1);
  944. }
  945. else if (field is float)
  946. {
  947. JoinAppend(SingleOps.__repr__(context, (float)field),
  948. quoted, rowlen == 1);
  949. }
  950. else if (field == null)
  951. JoinAppend(string.Empty, quoted, rowlen == 1);
  952. else
  953. JoinAppend(field.ToString(), quoted, rowlen == 1);
  954. }
  955. _rec.Add(_dialect.lineterminator);
  956. PythonOps.CallWithContext(
  957. context, _writeline, string.Join("", _rec.ToArray()));
  958. }
  959. [Documentation(@"writerows(sequence of sequences)
  960. Construct and write a series of sequences to a csv file. Non-string
  961. elements will be converted to string.")]
  962. public void writerows(CodeContext/*!*/ context, object sequence)
  963. {
  964. IEnumerator e = null;
  965. if (!PythonOps.TryGetEnumerator(context, sequence, out e))
  966. {
  967. throw PythonOps.TypeError(
  968. "writerows() argument must be iterable");
  969. }
  970. while (e.MoveNext())
  971. {
  972. writerow(context, e.Current);
  973. }
  974. }
  975. private void JoinReset()
  976. {
  977. _num_fields = 0;
  978. _rec.Clear();
  979. }
  980. private void JoinAppend(string field, bool quoted, bool quote_empty)
  981. {
  982. // if this is not the first field, we need a field separator
  983. if (_num_fields > 0)
  984. _rec.Add(_dialect.delimiter);
  985. List<char> need_escape = new List<char>();
  986. if (_dialect.quoting == QUOTE_NONE)
  987. {
  988. need_escape.AddRange(_dialect.lineterminator.ToCharArray());
  989. if (!string.IsNullOrEmpty(_dialect.escapechar))
  990. need_escape.Add(_dialect.escapechar[0]);
  991. if (!string.IsNullOrEmpty(_dialect.delimiter))
  992. need_escape.Add(_dialect.delimiter[0]);
  993. if (!string.IsNullOrEmpty(_dialect.quotechar))
  994. need_escape.Add(_dialect.quotechar[0]);
  995. }
  996. else
  997. {
  998. List<char> temp = new List<char>();
  999. temp.AddRange(_dialect.lineterminator.ToCharArray());
  1000. if (!string.IsNullOrEmpty(_dialect.delimiter))
  1001. temp.Add(_dialect.delimiter[0]);
  1002. if (!string.IsNullOrEmpty(_dialect.escapechar))
  1003. temp.Add(_dialect.escapechar[0]);
  1004. if (field.IndexOfAny(temp.ToArray()) >= 0)
  1005. quoted = true;
  1006. need_escape.Clear();
  1007. if (!string.IsNullOrEmpty(_dialect.quotechar) && field.Contains(_dialect.quotechar))
  1008. {
  1009. if (_dialect.doublequote)
  1010. {
  1011. field = field.Replace(_dialect.quotechar,
  1012. _dialect.quotechar + _dialect.quotechar);
  1013. quoted = true;
  1014. }
  1015. else
  1016. {
  1017. need_escape.Add(_dialect.quotechar[0]);
  1018. }
  1019. }
  1020. }
  1021. foreach (char c in need_escape)
  1022. {
  1023. if (field.IndexOf(c) >= 0)
  1024. {
  1025. if (string.IsNullOrEmpty(_dialect.escapechar))
  1026. throw MakeError("need to escape, but no escapechar set");
  1027. field = field.Replace(c.ToString(), _dialect.escapechar + c);
  1028. }
  1029. }
  1030. // If field is empty check if it needs to be quoted
  1031. if (string.IsNullOrEmpty(field) && quote_empty)
  1032. {
  1033. if (_dialect.quoting == QUOTE_NONE)
  1034. throw MakeError("single empty field record must be quoted");
  1035. quoted = true;
  1036. }
  1037. if (quoted)
  1038. field = _dialect.quotechar + field + _dialect.quotechar;
  1039. _rec.Add(field);
  1040. _num_fields++;
  1041. }
  1042. }
  1043. public static PythonType Error;
  1044. internal static Exception MakeError(params object[] args)
  1045. {
  1046. return PythonOps.CreateThrowable(Error, args);
  1047. }
  1048. private static void InitModuleExceptions(PythonContext context,
  1049. PythonDictionary dict)
  1050. {
  1051. Error = context.EnsureModuleException("csv.Error",
  1052. PythonExceptions.StandardError, dict, "Error", "_csv");
  1053. }
  1054. }
  1055. #if CLR2
  1056. static class StringBuilderExtensions
  1057. {
  1058. internal static StringBuilder Clear(this StringBuilder sb)
  1059. {
  1060. sb.Length = 0;
  1061. return sb;
  1062. }
  1063. }
  1064. #endif
  1065. }