PageRenderTime 49ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/third_party/google-closure-library/closure/goog/labs/format/csv.js

https://github.com/chromium/chromium
JavaScript | 417 lines | 229 code | 59 blank | 129 comment | 58 complexity | a99160f5e8b4a4c38c4cb286865130a9 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, Apache-2.0, BSD-3-Clause
  1. /**
  2. * @license
  3. * Copyright The Closure Library Authors.
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. /**
  7. * @fileoverview Provides a parser that turns a string of well-formed CSV data
  8. * into an array of objects or an array of arrays. All values are returned as
  9. * strings; the user has to convert data into numbers or Dates as required.
  10. * Empty fields (adjacent commas) are returned as empty strings.
  11. *
  12. * This parser uses http://tools.ietf.org/html/rfc4180 as the definition of CSV.
  13. */
  14. // TODO(user): We're trying to migrate all ES5 subclasses of Closure
  15. // Library to ES6. In ES6 this cannot be referenced before super is called. This
  16. // file has at least one this before a super call (in ES5) and cannot be
  17. // automatically upgraded to ES6 as a result. Please fix this if you have a
  18. // chance. Note: This can sometimes be caused by not calling the super
  19. // constructor at all. You can run the conversion tool yourself to see what it
  20. // does on this file: blaze run //javascript/refactoring/es6_classes:convert.
  21. goog.provide('goog.labs.format.csv');
  22. goog.provide('goog.labs.format.csv.ParseError');
  23. goog.provide('goog.labs.format.csv.Token');
  24. goog.require('goog.asserts');
  25. goog.require('goog.debug.Error');
  26. goog.require('goog.object');
  27. goog.require('goog.string');
  28. goog.require('goog.string.newlines');
  29. /**
  30. * @define {boolean} Enable verbose debugging. This is a flag so it can be
  31. * enabled in production if necessary post-compilation. Otherwise, debug
  32. * information will be stripped to minimize final code size.
  33. */
  34. goog.labs.format.csv.ENABLE_VERBOSE_DEBUGGING = goog.DEBUG;
  35. /**
  36. * Error thrown when parsing fails.
  37. *
  38. * @param {string} text The CSV source text being parsed.
  39. * @param {number} index The index, in the string, of the position of the
  40. * error.
  41. * @param {string=} opt_message A description of the violated parse expectation.
  42. * @constructor
  43. * @extends {goog.debug.Error}
  44. * @final
  45. */
  46. goog.labs.format.csv.ParseError = function(text, index, opt_message) {
  47. 'use strict';
  48. let message;
  49. /**
  50. * @type {?{line: number, column: number}} The line and column of the parse
  51. * error.
  52. */
  53. this.position = null;
  54. if (goog.labs.format.csv.ENABLE_VERBOSE_DEBUGGING) {
  55. message = opt_message || '';
  56. const info = goog.labs.format.csv.ParseError.findLineInfo_(text, index);
  57. if (info) {
  58. const lineNumber = info.lineIndex + 1;
  59. const columnNumber = index - info.line.startLineIndex + 1;
  60. this.position = {line: lineNumber, column: columnNumber};
  61. message +=
  62. goog.string.subs(' at line %s column %s', lineNumber, columnNumber);
  63. message += '\n' +
  64. goog.labs.format.csv.ParseError.getLineDebugString_(
  65. info.line.getContent(), columnNumber);
  66. }
  67. }
  68. goog.labs.format.csv.ParseError.base(this, 'constructor', message);
  69. };
  70. goog.inherits(goog.labs.format.csv.ParseError, goog.debug.Error);
  71. /** @inheritDoc */
  72. goog.labs.format.csv.ParseError.prototype.name = 'ParseError';
  73. /**
  74. * Calculate the line and column for an index in a string.
  75. * TODO(nnaze): Consider moving to goog.string.newlines.
  76. * @param {string} str A string.
  77. * @param {number} index An index into the string.
  78. * @return {?{line: !goog.string.newlines.Line, lineIndex: number}} The line
  79. * and index of the line.
  80. * @private
  81. */
  82. goog.labs.format.csv.ParseError.findLineInfo_ = function(str, index) {
  83. 'use strict';
  84. const lines = goog.string.newlines.getLines(str);
  85. const lineIndex = lines.findIndex(function(line) {
  86. 'use strict';
  87. return line.startLineIndex <= index && line.endLineIndex > index;
  88. });
  89. if (typeof (lineIndex) === 'number') {
  90. const line = lines[lineIndex];
  91. return {line: line, lineIndex: lineIndex};
  92. }
  93. return null;
  94. };
  95. /**
  96. * Get a debug string of a line and a pointing caret beneath it.
  97. * @param {string} str The string.
  98. * @param {number} column The column to point at (1-indexed).
  99. * @return {string} The debug line.
  100. * @private
  101. */
  102. goog.labs.format.csv.ParseError.getLineDebugString_ = function(str, column) {
  103. 'use strict';
  104. let returnString = str + '\n';
  105. returnString += goog.string.repeat(' ', column - 1) + '^';
  106. return returnString;
  107. };
  108. /**
  109. * A token -- a single-character string or a sentinel.
  110. * @typedef {string|!goog.labs.format.csv.Sentinels_}
  111. */
  112. goog.labs.format.csv.Token;
  113. /**
  114. * Parses a CSV string to create a two-dimensional array.
  115. *
  116. * This function does not process header lines, etc -- such transformations can
  117. * be made on the resulting array.
  118. *
  119. * @param {string} text The entire CSV text to be parsed.
  120. * @param {boolean=} opt_ignoreErrors Whether to ignore parsing errors and
  121. * instead try to recover and keep going.
  122. * @param {string=} opt_delimiter The delimiter to use. Defaults to ','
  123. * @return {!Array<!Array<string>>} The parsed CSV.
  124. */
  125. goog.labs.format.csv.parse = function(text, opt_ignoreErrors, opt_delimiter) {
  126. 'use strict';
  127. let index = 0; // current char offset being considered
  128. const delimiter = opt_delimiter || ',';
  129. goog.asserts.assert(
  130. delimiter.length == 1, 'Delimiter must be a single character.');
  131. goog.asserts.assert(
  132. delimiter != '\r' && opt_delimiter != '\n',
  133. 'Cannot use newline or carriage return as delimiter.');
  134. const EOF = goog.labs.format.csv.Sentinels_.EOF;
  135. const EOR = goog.labs.format.csv.Sentinels_.EOR;
  136. const NEWLINE = goog.labs.format.csv.Sentinels_.NEWLINE; // \r?\n
  137. const EMPTY = goog.labs.format.csv.Sentinels_.EMPTY;
  138. let pushBackToken = null; // A single-token pushback.
  139. let sawComma = false; // Special case for terminal comma.
  140. /**
  141. * Push a single token into the push-back variable.
  142. * @param {goog.labs.format.csv.Token} t Single token.
  143. */
  144. function pushBack(t) {
  145. goog.labs.format.csv.assertToken_(t);
  146. goog.asserts.assert(pushBackToken === null);
  147. pushBackToken = t;
  148. }
  149. /**
  150. * @return {goog.labs.format.csv.Token} The next token in the stream.
  151. */
  152. function nextToken() {
  153. // Give the push back token if present.
  154. if (pushBackToken != null) {
  155. const c = pushBackToken;
  156. pushBackToken = null;
  157. return c;
  158. }
  159. // We're done. EOF.
  160. if (index >= text.length) {
  161. return EOF;
  162. }
  163. // Give the next charater.
  164. const chr = text.charAt(index++);
  165. goog.labs.format.csv.assertToken_(chr);
  166. // Check if this is a newline. If so, give the new line sentinel.
  167. let isNewline = false;
  168. if (chr == '\n') {
  169. isNewline = true;
  170. } else if (chr == '\r') {
  171. // This is a '\r\n' newline. Treat as single token, go
  172. // forward two indicies.
  173. if (index < text.length && text.charAt(index) == '\n') {
  174. index++;
  175. }
  176. isNewline = true;
  177. }
  178. if (isNewline) {
  179. return NEWLINE;
  180. }
  181. return chr;
  182. }
  183. /**
  184. * Read a quoted field from input.
  185. * @return {string} The field, as a string.
  186. */
  187. function readQuotedField() {
  188. // We've already consumed the first quote by the time we get here.
  189. const start = index;
  190. let end = null;
  191. for (let token = nextToken(); token != EOF; token = nextToken()) {
  192. if (token == '"') {
  193. end = index - 1;
  194. token = nextToken();
  195. // Two double quotes in a row. Keep scanning.
  196. if (token == '"') {
  197. end = null;
  198. continue;
  199. }
  200. // End of field. Break out.
  201. if (token == delimiter || token == EOF || token == NEWLINE) {
  202. if (token == NEWLINE) {
  203. pushBack(token);
  204. }
  205. if (token == delimiter) {
  206. sawComma = true;
  207. }
  208. break;
  209. }
  210. if (!opt_ignoreErrors) {
  211. // Ignoring errors here means keep going in current field after
  212. // closing quote. E.g. "ab"c,d splits into abc,d
  213. throw new goog.labs.format.csv.ParseError(
  214. text, index - 1,
  215. 'Unexpected character "' + token + '" after quote mark');
  216. } else {
  217. // Fall back to reading the rest of this field as unquoted.
  218. // Note: the rest is guaranteed not start with ", as that case is
  219. // eliminated above.
  220. const prefix = '"' + text.substring(start, index);
  221. const suffix = readField();
  222. if (suffix == EOR) {
  223. pushBack(NEWLINE);
  224. return prefix;
  225. } else {
  226. return prefix + suffix;
  227. }
  228. }
  229. }
  230. }
  231. if (end === null) {
  232. if (!opt_ignoreErrors) {
  233. throw new goog.labs.format.csv.ParseError(
  234. text, text.length - 1, 'Unexpected end of text after open quote');
  235. } else {
  236. end = text.length;
  237. }
  238. }
  239. // Take substring, combine double quotes.
  240. return text.substring(start, end).replace(/""/g, '"');
  241. }
  242. /**
  243. * Read a field from input.
  244. * @return {string|!goog.labs.format.csv.Sentinels_} The field, as a string,
  245. * or a sentinel (if applicable).
  246. */
  247. function readField() {
  248. const start = index;
  249. const didSeeComma = sawComma;
  250. sawComma = false;
  251. let token = nextToken();
  252. if (token == EMPTY) {
  253. return EOR;
  254. }
  255. if (token == EOF || token == NEWLINE) {
  256. if (didSeeComma) {
  257. pushBack(EMPTY);
  258. return '';
  259. }
  260. return EOR;
  261. }
  262. // This is the beginning of a quoted field.
  263. if (token == '"') {
  264. return readQuotedField();
  265. }
  266. while (true) {
  267. // This is the end of line or file.
  268. if (token == EOF || token == NEWLINE) {
  269. pushBack(token);
  270. break;
  271. }
  272. // This is the end of record.
  273. if (token == delimiter) {
  274. sawComma = true;
  275. break;
  276. }
  277. if (token == '"' && !opt_ignoreErrors) {
  278. throw new goog.labs.format.csv.ParseError(
  279. text, index - 1, 'Unexpected quote mark');
  280. }
  281. token = nextToken();
  282. }
  283. const returnString = (token == EOF) ?
  284. text.substring(start) : // Return to end of file.
  285. text.substring(start, index - 1);
  286. return returnString.replace(/[\r\n]+/g, ''); // Squash any CRLFs.
  287. }
  288. /**
  289. * Read the next record.
  290. * @return {!Array<string>|!goog.labs.format.csv.Sentinels_} A single record
  291. * with multiple fields.
  292. */
  293. function readRecord() {
  294. if (index >= text.length) {
  295. return EOF;
  296. }
  297. const record = [];
  298. for (let field = readField(); field != EOR; field = readField()) {
  299. record.push(field);
  300. }
  301. return record;
  302. }
  303. // Read all records and return.
  304. const records = [];
  305. for (let record = readRecord(); record != EOF; record = readRecord()) {
  306. records.push(record);
  307. }
  308. return records;
  309. };
  310. /**
  311. * Sentinel tracking objects.
  312. * @enum {!Object}
  313. * @private
  314. */
  315. goog.labs.format.csv.Sentinels_ = {
  316. /** Empty field */
  317. EMPTY: {},
  318. /** End of file */
  319. EOF: {},
  320. /** End of record */
  321. EOR: {},
  322. /** Newline. \r?\n */
  323. NEWLINE: {}
  324. };
  325. /**
  326. * @param {string} str A string.
  327. * @return {boolean} Whether the string is a single character.
  328. * @private
  329. */
  330. goog.labs.format.csv.isCharacterString_ = function(str) {
  331. 'use strict';
  332. return typeof str === 'string' && str.length == 1;
  333. };
  334. /**
  335. * Assert the parameter is a token.
  336. * @param {*} o What should be a token.
  337. * @throws {goog.asserts.AssertionError} If {@ code} is not a token.
  338. * @private
  339. */
  340. goog.labs.format.csv.assertToken_ = function(o) {
  341. 'use strict';
  342. if (typeof o === 'string') {
  343. goog.asserts.assertString(o);
  344. goog.asserts.assert(
  345. goog.labs.format.csv.isCharacterString_(o),
  346. 'Should be a string of length 1 or a sentinel.');
  347. } else {
  348. goog.asserts.assert(
  349. goog.object.containsValue(goog.labs.format.csv.Sentinels_, o),
  350. 'Should be a string of length 1 or a sentinel.');
  351. }
  352. };