PageRenderTime 59ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/io/parsers.py

http://github.com/pydata/pandas
Python | 3719 lines | 3659 code | 21 blank | 39 comment | 153 complexity | c989477ba1887f9bd065ae6a07118f48 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. """
  2. Module contains tools for processing files into DataFrames or other objects
  3. """
  4. from collections import abc, defaultdict
  5. import csv
  6. import datetime
  7. from io import StringIO, TextIOWrapper
  8. import itertools
  9. import re
  10. import sys
  11. from textwrap import fill
  12. from typing import Any, Dict, Iterable, List, Set
  13. import warnings
  14. import numpy as np
  15. import pandas._libs.lib as lib
  16. import pandas._libs.ops as libops
  17. import pandas._libs.parsers as parsers
  18. from pandas._libs.parsers import STR_NA_VALUES
  19. from pandas._libs.tslibs import parsing
  20. from pandas._typing import FilePathOrBuffer
  21. from pandas.errors import (
  22. AbstractMethodError,
  23. EmptyDataError,
  24. ParserError,
  25. ParserWarning,
  26. )
  27. from pandas.util._decorators import Appender
  28. from pandas.core.dtypes.cast import astype_nansafe
  29. from pandas.core.dtypes.common import (
  30. ensure_object,
  31. ensure_str,
  32. is_bool_dtype,
  33. is_categorical_dtype,
  34. is_dict_like,
  35. is_dtype_equal,
  36. is_extension_array_dtype,
  37. is_file_like,
  38. is_float,
  39. is_integer,
  40. is_integer_dtype,
  41. is_list_like,
  42. is_object_dtype,
  43. is_scalar,
  44. is_string_dtype,
  45. pandas_dtype,
  46. )
  47. from pandas.core.dtypes.dtypes import CategoricalDtype
  48. from pandas.core.dtypes.missing import isna
  49. from pandas.core import algorithms
  50. from pandas.core.arrays import Categorical
  51. from pandas.core.frame import DataFrame
  52. from pandas.core.indexes.api import (
  53. Index,
  54. MultiIndex,
  55. RangeIndex,
  56. ensure_index_from_sequences,
  57. )
  58. from pandas.core.series import Series
  59. from pandas.core.tools import datetimes as tools
  60. from pandas.io.common import (
  61. get_filepath_or_buffer,
  62. get_handle,
  63. infer_compression,
  64. validate_header_arg,
  65. )
  66. from pandas.io.date_converters import generic_parser
  67. # BOM character (byte order mark)
  68. # This exists at the beginning of a file to indicate endianness
  69. # of a file (stream). Unfortunately, this marker screws up parsing,
  70. # so we need to remove it if we see it.
  71. _BOM = "\ufeff"
  72. _doc_read_csv_and_table = (
  73. r"""
  74. {summary}
  75. Also supports optionally iterating or breaking of the file
  76. into chunks.
  77. Additional help can be found in the online docs for
  78. `IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
  79. Parameters
  80. ----------
  81. filepath_or_buffer : str, path object or file-like object
  82. Any valid string path is acceptable. The string could be a URL. Valid
  83. URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
  84. expected. A local file could be: file://localhost/path/to/table.csv.
  85. If you want to pass in a path object, pandas accepts any ``os.PathLike``.
  86. By file-like object, we refer to objects with a ``read()`` method, such as
  87. a file handler (e.g. via builtin ``open`` function) or ``StringIO``.
  88. sep : str, default {_default_sep}
  89. Delimiter to use. If sep is None, the C engine cannot automatically detect
  90. the separator, but the Python parsing engine can, meaning the latter will
  91. be used and automatically detect the separator by Python's builtin sniffer
  92. tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
  93. different from ``'\s+'`` will be interpreted as regular expressions and
  94. will also force the use of the Python parsing engine. Note that regex
  95. delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``.
  96. delimiter : str, default ``None``
  97. Alias for sep.
  98. header : int, list of int, default 'infer'
  99. Row number(s) to use as the column names, and the start of the
  100. data. Default behavior is to infer the column names: if no names
  101. are passed the behavior is identical to ``header=0`` and column
  102. names are inferred from the first line of the file, if column
  103. names are passed explicitly then the behavior is identical to
  104. ``header=None``. Explicitly pass ``header=0`` to be able to
  105. replace existing names. The header can be a list of integers that
  106. specify row locations for a multi-index on the columns
  107. e.g. [0,1,3]. Intervening rows that are not specified will be
  108. skipped (e.g. 2 in this example is skipped). Note that this
  109. parameter ignores commented lines and empty lines if
  110. ``skip_blank_lines=True``, so ``header=0`` denotes the first line of
  111. data rather than the first line of the file.
  112. names : array-like, optional
  113. List of column names to use. If the file contains a header row,
  114. then you should explicitly pass ``header=0`` to override the column names.
  115. Duplicates in this list are not allowed.
  116. index_col : int, str, sequence of int / str, or False, default ``None``
  117. Column(s) to use as the row labels of the ``DataFrame``, either given as
  118. string name or column index. If a sequence of int / str is given, a
  119. MultiIndex is used.
  120. Note: ``index_col=False`` can be used to force pandas to *not* use the first
  121. column as the index, e.g. when you have a malformed file with delimiters at
  122. the end of each line.
  123. usecols : list-like or callable, optional
  124. Return a subset of the columns. If list-like, all elements must either
  125. be positional (i.e. integer indices into the document columns) or strings
  126. that correspond to column names provided either by the user in `names` or
  127. inferred from the document header row(s). For example, a valid list-like
  128. `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
  129. Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
  130. To instantiate a DataFrame from ``data`` with element order preserved use
  131. ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns
  132. in ``['foo', 'bar']`` order or
  133. ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
  134. for ``['bar', 'foo']`` order.
  135. If callable, the callable function will be evaluated against the column
  136. names, returning names where the callable function evaluates to True. An
  137. example of a valid callable argument would be ``lambda x: x.upper() in
  138. ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
  139. parsing time and lower memory usage.
  140. squeeze : bool, default False
  141. If the parsed data only contains one column then return a Series.
  142. prefix : str, optional
  143. Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
  144. mangle_dupe_cols : bool, default True
  145. Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
  146. 'X'...'X'. Passing in False will cause data to be overwritten if there
  147. are duplicate names in the columns.
  148. dtype : Type name or dict of column -> type, optional
  149. Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32,
  150. 'c': 'Int64'}}
  151. Use `str` or `object` together with suitable `na_values` settings
  152. to preserve and not interpret dtype.
  153. If converters are specified, they will be applied INSTEAD
  154. of dtype conversion.
  155. engine : {{'c', 'python'}}, optional
  156. Parser engine to use. The C engine is faster while the python engine is
  157. currently more feature-complete.
  158. converters : dict, optional
  159. Dict of functions for converting values in certain columns. Keys can either
  160. be integers or column labels.
  161. true_values : list, optional
  162. Values to consider as True.
  163. false_values : list, optional
  164. Values to consider as False.
  165. skipinitialspace : bool, default False
  166. Skip spaces after delimiter.
  167. skiprows : list-like, int or callable, optional
  168. Line numbers to skip (0-indexed) or number of lines to skip (int)
  169. at the start of the file.
  170. If callable, the callable function will be evaluated against the row
  171. indices, returning True if the row should be skipped and False otherwise.
  172. An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
  173. skipfooter : int, default 0
  174. Number of lines at bottom of file to skip (Unsupported with engine='c').
  175. nrows : int, optional
  176. Number of rows of file to read. Useful for reading pieces of large files.
  177. na_values : scalar, str, list-like, or dict, optional
  178. Additional strings to recognize as NA/NaN. If dict passed, specific
  179. per-column NA values. By default the following values are interpreted as
  180. NaN: '"""
  181. + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ")
  182. + """'.
  183. keep_default_na : bool, default True
  184. Whether or not to include the default NaN values when parsing the data.
  185. Depending on whether `na_values` is passed in, the behavior is as follows:
  186. * If `keep_default_na` is True, and `na_values` are specified, `na_values`
  187. is appended to the default NaN values used for parsing.
  188. * If `keep_default_na` is True, and `na_values` are not specified, only
  189. the default NaN values are used for parsing.
  190. * If `keep_default_na` is False, and `na_values` are specified, only
  191. the NaN values specified `na_values` are used for parsing.
  192. * If `keep_default_na` is False, and `na_values` are not specified, no
  193. strings will be parsed as NaN.
  194. Note that if `na_filter` is passed in as False, the `keep_default_na` and
  195. `na_values` parameters will be ignored.
  196. na_filter : bool, default True
  197. Detect missing value markers (empty strings and the value of na_values). In
  198. data without any NAs, passing na_filter=False can improve the performance
  199. of reading a large file.
  200. verbose : bool, default False
  201. Indicate number of NA values placed in non-numeric columns.
  202. skip_blank_lines : bool, default True
  203. If True, skip over blank lines rather than interpreting as NaN values.
  204. parse_dates : bool or list of int or names or list of lists or dict, \
  205. default False
  206. The behavior is as follows:
  207. * boolean. If True -> try parsing the index.
  208. * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
  209. each as a separate date column.
  210. * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as
  211. a single date column.
  212. * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call
  213. result 'foo'
  214. If a column or index cannot be represented as an array of datetimes,
  215. say because of an unparseable value or a mixture of timezones, the column
  216. or index will be returned unaltered as an object data type. For
  217. non-standard datetime parsing, use ``pd.to_datetime`` after
  218. ``pd.read_csv``. To parse an index or column with a mixture of timezones,
  219. specify ``date_parser`` to be a partially-applied
  220. :func:`pandas.to_datetime` with ``utc=True``. See
  221. :ref:`io.csv.mixed_timezones` for more.
  222. Note: A fast-path exists for iso8601-formatted dates.
  223. infer_datetime_format : bool, default False
  224. If True and `parse_dates` is enabled, pandas will attempt to infer the
  225. format of the datetime strings in the columns, and if it can be inferred,
  226. switch to a faster method of parsing them. In some cases this can increase
  227. the parsing speed by 5-10x.
  228. keep_date_col : bool, default False
  229. If True and `parse_dates` specifies combining multiple columns then
  230. keep the original columns.
  231. date_parser : function, optional
  232. Function to use for converting a sequence of string columns to an array of
  233. datetime instances. The default uses ``dateutil.parser.parser`` to do the
  234. conversion. Pandas will try to call `date_parser` in three different ways,
  235. advancing to the next if an exception occurs: 1) Pass one or more arrays
  236. (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
  237. string values from the columns defined by `parse_dates` into a single array
  238. and pass that; and 3) call `date_parser` once for each row using one or
  239. more strings (corresponding to the columns defined by `parse_dates`) as
  240. arguments.
  241. dayfirst : bool, default False
  242. DD/MM format dates, international and European format.
  243. cache_dates : bool, default True
  244. If True, use a cache of unique, converted dates to apply the datetime
  245. conversion. May produce significant speed-up when parsing duplicate
  246. date strings, especially ones with timezone offsets.
  247. .. versionadded:: 0.25.0
  248. iterator : bool, default False
  249. Return TextFileReader object for iteration or getting chunks with
  250. ``get_chunk()``.
  251. chunksize : int, optional
  252. Return TextFileReader object for iteration.
  253. See the `IO Tools docs
  254. <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
  255. for more information on ``iterator`` and ``chunksize``.
  256. compression : {{'infer', 'gzip', 'bz2', 'zip', 'xz', None}}, default 'infer'
  257. For on-the-fly decompression of on-disk data. If 'infer' and
  258. `filepath_or_buffer` is path-like, then detect compression from the
  259. following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no
  260. decompression). If using 'zip', the ZIP file must contain only one data
  261. file to be read in. Set to None for no decompression.
  262. thousands : str, optional
  263. Thousands separator.
  264. decimal : str, default '.'
  265. Character to recognize as decimal point (e.g. use ',' for European data).
  266. lineterminator : str (length 1), optional
  267. Character to break file into lines. Only valid with C parser.
  268. quotechar : str (length 1), optional
  269. The character used to denote the start and end of a quoted item. Quoted
  270. items can include the delimiter and it will be ignored.
  271. quoting : int or csv.QUOTE_* instance, default 0
  272. Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
  273. QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
  274. doublequote : bool, default ``True``
  275. When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
  276. whether or not to interpret two consecutive quotechar elements INSIDE a
  277. field as a single ``quotechar`` element.
  278. escapechar : str (length 1), optional
  279. One-character string used to escape other characters.
  280. comment : str, optional
  281. Indicates remainder of line should not be parsed. If found at the beginning
  282. of a line, the line will be ignored altogether. This parameter must be a
  283. single character. Like empty lines (as long as ``skip_blank_lines=True``),
  284. fully commented lines are ignored by the parameter `header` but not by
  285. `skiprows`. For example, if ``comment='#'``, parsing
  286. ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being
  287. treated as the header.
  288. encoding : str, optional
  289. Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
  290. standard encodings
  291. <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
  292. dialect : str or csv.Dialect, optional
  293. If provided, this parameter will override values (default or not) for the
  294. following parameters: `delimiter`, `doublequote`, `escapechar`,
  295. `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
  296. override values, a ParserWarning will be issued. See csv.Dialect
  297. documentation for more details.
  298. error_bad_lines : bool, default True
  299. Lines with too many fields (e.g. a csv line with too many commas) will by
  300. default cause an exception to be raised, and no DataFrame will be returned.
  301. If False, then these "bad lines" will dropped from the DataFrame that is
  302. returned.
  303. warn_bad_lines : bool, default True
  304. If error_bad_lines is False, and warn_bad_lines is True, a warning for each
  305. "bad line" will be output.
  306. delim_whitespace : bool, default False
  307. Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
  308. used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
  309. is set to True, nothing should be passed in for the ``delimiter``
  310. parameter.
  311. low_memory : bool, default True
  312. Internally process the file in chunks, resulting in lower memory use
  313. while parsing, but possibly mixed type inference. To ensure no mixed
  314. types either set False, or specify the type with the `dtype` parameter.
  315. Note that the entire file is read into a single DataFrame regardless,
  316. use the `chunksize` or `iterator` parameter to return the data in chunks.
  317. (Only valid with C parser).
  318. memory_map : bool, default False
  319. If a filepath is provided for `filepath_or_buffer`, map the file object
  320. directly onto memory and access the data directly from there. Using this
  321. option can improve performance because there is no longer any I/O overhead.
  322. float_precision : str, optional
  323. Specifies which converter the C engine should use for floating-point
  324. values. The options are `None` for the ordinary converter,
  325. `high` for the high-precision converter, and `round_trip` for the
  326. round-trip converter.
  327. Returns
  328. -------
  329. DataFrame or TextParser
  330. A comma-separated values (csv) file is returned as two-dimensional
  331. data structure with labeled axes.
  332. See Also
  333. --------
  334. DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
  335. read_csv : Read a comma-separated values (csv) file into DataFrame.
  336. read_fwf : Read a table of fixed-width formatted lines into DataFrame.
  337. Examples
  338. --------
  339. >>> pd.{func_name}('data.csv') # doctest: +SKIP
  340. """
  341. )
  342. def _validate_integer(name, val, min_val=0):
  343. """
  344. Checks whether the 'name' parameter for parsing is either
  345. an integer OR float that can SAFELY be cast to an integer
  346. without losing accuracy. Raises a ValueError if that is
  347. not the case.
  348. Parameters
  349. ----------
  350. name : string
  351. Parameter name (used for error reporting)
  352. val : int or float
  353. The value to check
  354. min_val : int
  355. Minimum allowed value (val < min_val will result in a ValueError)
  356. """
  357. msg = f"'{name:s}' must be an integer >={min_val:d}"
  358. if val is not None:
  359. if is_float(val):
  360. if int(val) != val:
  361. raise ValueError(msg)
  362. val = int(val)
  363. elif not (is_integer(val) and val >= min_val):
  364. raise ValueError(msg)
  365. return val
  366. def _validate_names(names):
  367. """
  368. Raise ValueError if the `names` parameter contains duplicates.
  369. Parameters
  370. ----------
  371. names : array-like or None
  372. An array containing a list of the names used for the output DataFrame.
  373. Raises
  374. ------
  375. ValueError
  376. If names are not unique.
  377. """
  378. if names is not None:
  379. if len(names) != len(set(names)):
  380. raise ValueError("Duplicate names are not allowed.")
  381. def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
  382. """Generic reader of line files."""
  383. encoding = kwds.get("encoding", None)
  384. if encoding is not None:
  385. encoding = re.sub("_", "-", encoding).lower()
  386. kwds["encoding"] = encoding
  387. compression = kwds.get("compression", "infer")
  388. compression = infer_compression(filepath_or_buffer, compression)
  389. # TODO: get_filepath_or_buffer could return
  390. # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
  391. # though mypy handling of conditional imports is difficult.
  392. # See https://github.com/python/mypy/issues/1297
  393. fp_or_buf, _, compression, should_close = get_filepath_or_buffer(
  394. filepath_or_buffer, encoding, compression
  395. )
  396. kwds["compression"] = compression
  397. if kwds.get("date_parser", None) is not None:
  398. if isinstance(kwds["parse_dates"], bool):
  399. kwds["parse_dates"] = True
  400. # Extract some of the arguments (pass chunksize on).
  401. iterator = kwds.get("iterator", False)
  402. chunksize = _validate_integer("chunksize", kwds.get("chunksize", None), 1)
  403. nrows = kwds.get("nrows", None)
  404. # Check for duplicates in names.
  405. _validate_names(kwds.get("names", None))
  406. # Create the parser.
  407. parser = TextFileReader(fp_or_buf, **kwds)
  408. if chunksize or iterator:
  409. return parser
  410. try:
  411. data = parser.read(nrows)
  412. finally:
  413. parser.close()
  414. if should_close:
  415. try:
  416. fp_or_buf.close()
  417. except ValueError:
  418. pass
  419. return data
  420. _parser_defaults = {
  421. "delimiter": None,
  422. "escapechar": None,
  423. "quotechar": '"',
  424. "quoting": csv.QUOTE_MINIMAL,
  425. "doublequote": True,
  426. "skipinitialspace": False,
  427. "lineterminator": None,
  428. "header": "infer",
  429. "index_col": None,
  430. "names": None,
  431. "prefix": None,
  432. "skiprows": None,
  433. "skipfooter": 0,
  434. "nrows": None,
  435. "na_values": None,
  436. "keep_default_na": True,
  437. "true_values": None,
  438. "false_values": None,
  439. "converters": None,
  440. "dtype": None,
  441. "cache_dates": True,
  442. "thousands": None,
  443. "comment": None,
  444. "decimal": ".",
  445. # 'engine': 'c',
  446. "parse_dates": False,
  447. "keep_date_col": False,
  448. "dayfirst": False,
  449. "date_parser": None,
  450. "usecols": None,
  451. # 'iterator': False,
  452. "chunksize": None,
  453. "verbose": False,
  454. "encoding": None,
  455. "squeeze": False,
  456. "compression": None,
  457. "mangle_dupe_cols": True,
  458. "infer_datetime_format": False,
  459. "skip_blank_lines": True,
  460. }
  461. _c_parser_defaults = {
  462. "delim_whitespace": False,
  463. "na_filter": True,
  464. "low_memory": True,
  465. "memory_map": False,
  466. "error_bad_lines": True,
  467. "warn_bad_lines": True,
  468. "float_precision": None,
  469. }
  470. _fwf_defaults = {"colspecs": "infer", "infer_nrows": 100, "widths": None}
  471. _c_unsupported = {"skipfooter"}
  472. _python_unsupported = {"low_memory", "float_precision"}
  473. _deprecated_defaults: Dict[str, Any] = {}
  474. _deprecated_args: Set[str] = set()
  475. def _make_parser_function(name, default_sep=","):
  476. def parser_f(
  477. filepath_or_buffer: FilePathOrBuffer,
  478. sep=default_sep,
  479. delimiter=None,
  480. # Column and Index Locations and Names
  481. header="infer",
  482. names=None,
  483. index_col=None,
  484. usecols=None,
  485. squeeze=False,
  486. prefix=None,
  487. mangle_dupe_cols=True,
  488. # General Parsing Configuration
  489. dtype=None,
  490. engine=None,
  491. converters=None,
  492. true_values=None,
  493. false_values=None,
  494. skipinitialspace=False,
  495. skiprows=None,
  496. skipfooter=0,
  497. nrows=None,
  498. # NA and Missing Data Handling
  499. na_values=None,
  500. keep_default_na=True,
  501. na_filter=True,
  502. verbose=False,
  503. skip_blank_lines=True,
  504. # Datetime Handling
  505. parse_dates=False,
  506. infer_datetime_format=False,
  507. keep_date_col=False,
  508. date_parser=None,
  509. dayfirst=False,
  510. cache_dates=True,
  511. # Iteration
  512. iterator=False,
  513. chunksize=None,
  514. # Quoting, Compression, and File Format
  515. compression="infer",
  516. thousands=None,
  517. decimal: str = ".",
  518. lineterminator=None,
  519. quotechar='"',
  520. quoting=csv.QUOTE_MINIMAL,
  521. doublequote=True,
  522. escapechar=None,
  523. comment=None,
  524. encoding=None,
  525. dialect=None,
  526. # Error Handling
  527. error_bad_lines=True,
  528. warn_bad_lines=True,
  529. # Internal
  530. delim_whitespace=False,
  531. low_memory=_c_parser_defaults["low_memory"],
  532. memory_map=False,
  533. float_precision=None,
  534. ):
  535. # gh-23761
  536. #
  537. # When a dialect is passed, it overrides any of the overlapping
  538. # parameters passed in directly. We don't want to warn if the
  539. # default parameters were passed in (since it probably means
  540. # that the user didn't pass them in explicitly in the first place).
  541. #
  542. # "delimiter" is the annoying corner case because we alias it to
  543. # "sep" before doing comparison to the dialect values later on.
  544. # Thus, we need a flag to indicate that we need to "override"
  545. # the comparison to dialect values by checking if default values
  546. # for BOTH "delimiter" and "sep" were provided.
  547. if dialect is not None:
  548. sep_override = delimiter is None and sep == default_sep
  549. kwds = dict(sep_override=sep_override)
  550. else:
  551. kwds = dict()
  552. # Alias sep -> delimiter.
  553. if delimiter is None:
  554. delimiter = sep
  555. if delim_whitespace and delimiter != default_sep:
  556. raise ValueError(
  557. "Specified a delimiter with both sep and "
  558. "delim_whitespace=True; you can only specify one."
  559. )
  560. if engine is not None:
  561. engine_specified = True
  562. else:
  563. engine = "c"
  564. engine_specified = False
  565. kwds.update(
  566. delimiter=delimiter,
  567. engine=engine,
  568. dialect=dialect,
  569. compression=compression,
  570. engine_specified=engine_specified,
  571. doublequote=doublequote,
  572. escapechar=escapechar,
  573. quotechar=quotechar,
  574. quoting=quoting,
  575. skipinitialspace=skipinitialspace,
  576. lineterminator=lineterminator,
  577. header=header,
  578. index_col=index_col,
  579. names=names,
  580. prefix=prefix,
  581. skiprows=skiprows,
  582. skipfooter=skipfooter,
  583. na_values=na_values,
  584. true_values=true_values,
  585. false_values=false_values,
  586. keep_default_na=keep_default_na,
  587. thousands=thousands,
  588. comment=comment,
  589. decimal=decimal,
  590. parse_dates=parse_dates,
  591. keep_date_col=keep_date_col,
  592. dayfirst=dayfirst,
  593. date_parser=date_parser,
  594. cache_dates=cache_dates,
  595. nrows=nrows,
  596. iterator=iterator,
  597. chunksize=chunksize,
  598. converters=converters,
  599. dtype=dtype,
  600. usecols=usecols,
  601. verbose=verbose,
  602. encoding=encoding,
  603. squeeze=squeeze,
  604. memory_map=memory_map,
  605. float_precision=float_precision,
  606. na_filter=na_filter,
  607. delim_whitespace=delim_whitespace,
  608. warn_bad_lines=warn_bad_lines,
  609. error_bad_lines=error_bad_lines,
  610. low_memory=low_memory,
  611. mangle_dupe_cols=mangle_dupe_cols,
  612. infer_datetime_format=infer_datetime_format,
  613. skip_blank_lines=skip_blank_lines,
  614. )
  615. return _read(filepath_or_buffer, kwds)
  616. parser_f.__name__ = name
  617. return parser_f
  618. read_csv = _make_parser_function("read_csv", default_sep=",")
  619. read_csv = Appender(
  620. _doc_read_csv_and_table.format(
  621. func_name="read_csv",
  622. summary="Read a comma-separated values (csv) file into DataFrame.",
  623. _default_sep="','",
  624. )
  625. )(read_csv)
  626. read_table = _make_parser_function("read_table", default_sep="\t")
  627. read_table = Appender(
  628. _doc_read_csv_and_table.format(
  629. func_name="read_table",
  630. summary="Read general delimited file into DataFrame.",
  631. _default_sep=r"'\\t' (tab-stop)",
  632. )
  633. )(read_table)
  634. def read_fwf(
  635. filepath_or_buffer: FilePathOrBuffer,
  636. colspecs="infer",
  637. widths=None,
  638. infer_nrows=100,
  639. **kwds,
  640. ):
  641. r"""
  642. Read a table of fixed-width formatted lines into DataFrame.
  643. Also supports optionally iterating or breaking of the file
  644. into chunks.
  645. Additional help can be found in the `online docs for IO Tools
  646. <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
  647. Parameters
  648. ----------
  649. filepath_or_buffer : str, path object or file-like object
  650. Any valid string path is acceptable. The string could be a URL. Valid
  651. URL schemes include http, ftp, s3, and file. For file URLs, a host is
  652. expected. A local file could be:
  653. ``file://localhost/path/to/table.csv``.
  654. If you want to pass in a path object, pandas accepts any
  655. ``os.PathLike``.
  656. By file-like object, we refer to objects with a ``read()`` method,
  657. such as a file handler (e.g. via builtin ``open`` function)
  658. or ``StringIO``.
  659. colspecs : list of tuple (int, int) or 'infer'. optional
  660. A list of tuples giving the extents of the fixed-width
  661. fields of each line as half-open intervals (i.e., [from, to[ ).
  662. String value 'infer' can be used to instruct the parser to try
  663. detecting the column specifications from the first 100 rows of
  664. the data which are not being skipped via skiprows (default='infer').
  665. widths : list of int, optional
  666. A list of field widths which can be used instead of 'colspecs' if
  667. the intervals are contiguous.
  668. infer_nrows : int, default 100
  669. The number of rows to consider when letting the parser determine the
  670. `colspecs`.
  671. .. versionadded:: 0.24.0
  672. **kwds : optional
  673. Optional keyword arguments can be passed to ``TextFileReader``.
  674. Returns
  675. -------
  676. DataFrame or TextParser
  677. A comma-separated values (csv) file is returned as two-dimensional
  678. data structure with labeled axes.
  679. See Also
  680. --------
  681. DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
  682. read_csv : Read a comma-separated values (csv) file into DataFrame.
  683. Examples
  684. --------
  685. >>> pd.read_fwf('data.csv') # doctest: +SKIP
  686. """
  687. # Check input arguments.
  688. if colspecs is None and widths is None:
  689. raise ValueError("Must specify either colspecs or widths")
  690. elif colspecs not in (None, "infer") and widths is not None:
  691. raise ValueError("You must specify only one of 'widths' and 'colspecs'")
  692. # Compute 'colspecs' from 'widths', if specified.
  693. if widths is not None:
  694. colspecs, col = [], 0
  695. for w in widths:
  696. colspecs.append((col, col + w))
  697. col += w
  698. kwds["colspecs"] = colspecs
  699. kwds["infer_nrows"] = infer_nrows
  700. kwds["engine"] = "python-fwf"
  701. return _read(filepath_or_buffer, kwds)
  702. class TextFileReader(abc.Iterator):
  703. """
  704. Passed dialect overrides any of the related parser options
  705. """
  706. def __init__(self, f, engine=None, **kwds):
  707. self.f = f
  708. if engine is not None:
  709. engine_specified = True
  710. else:
  711. engine = "python"
  712. engine_specified = False
  713. self._engine_specified = kwds.get("engine_specified", engine_specified)
  714. if kwds.get("dialect") is not None:
  715. dialect = kwds["dialect"]
  716. if dialect in csv.list_dialects():
  717. dialect = csv.get_dialect(dialect)
  718. # Any valid dialect should have these attributes.
  719. # If any are missing, we will raise automatically.
  720. for param in (
  721. "delimiter",
  722. "doublequote",
  723. "escapechar",
  724. "skipinitialspace",
  725. "quotechar",
  726. "quoting",
  727. ):
  728. try:
  729. dialect_val = getattr(dialect, param)
  730. except AttributeError as err:
  731. raise ValueError(
  732. f"Invalid dialect {kwds['dialect']} provided"
  733. ) from err
  734. parser_default = _parser_defaults[param]
  735. provided = kwds.get(param, parser_default)
  736. # Messages for conflicting values between the dialect
  737. # instance and the actual parameters provided.
  738. conflict_msgs = []
  739. # Don't warn if the default parameter was passed in,
  740. # even if it conflicts with the dialect (gh-23761).
  741. if provided != parser_default and provided != dialect_val:
  742. msg = (
  743. f"Conflicting values for '{param}': '{provided}' was "
  744. f"provided, but the dialect specifies '{dialect_val}'. "
  745. "Using the dialect-specified value."
  746. )
  747. # Annoying corner case for not warning about
  748. # conflicts between dialect and delimiter parameter.
  749. # Refer to the outer "_read_" function for more info.
  750. if not (param == "delimiter" and kwds.pop("sep_override", False)):
  751. conflict_msgs.append(msg)
  752. if conflict_msgs:
  753. warnings.warn(
  754. "\n\n".join(conflict_msgs), ParserWarning, stacklevel=2
  755. )
  756. kwds[param] = dialect_val
  757. if kwds.get("skipfooter"):
  758. if kwds.get("iterator") or kwds.get("chunksize"):
  759. raise ValueError("'skipfooter' not supported for 'iteration'")
  760. if kwds.get("nrows"):
  761. raise ValueError("'skipfooter' not supported with 'nrows'")
  762. if kwds.get("header", "infer") == "infer":
  763. kwds["header"] = 0 if kwds.get("names") is None else None
  764. self.orig_options = kwds
  765. # miscellanea
  766. self.engine = engine
  767. self._engine = None
  768. self._currow = 0
  769. options = self._get_options_with_defaults(engine)
  770. self.chunksize = options.pop("chunksize", None)
  771. self.nrows = options.pop("nrows", None)
  772. self.squeeze = options.pop("squeeze", False)
  773. # might mutate self.engine
  774. self.engine = self._check_file_or_buffer(f, engine)
  775. self.options, self.engine = self._clean_options(options, engine)
  776. if "has_index_names" in kwds:
  777. self.options["has_index_names"] = kwds["has_index_names"]
  778. self._make_engine(self.engine)
  779. def close(self):
  780. self._engine.close()
  781. def _get_options_with_defaults(self, engine):
  782. kwds = self.orig_options
  783. options = {}
  784. for argname, default in _parser_defaults.items():
  785. value = kwds.get(argname, default)
  786. # see gh-12935
  787. if argname == "mangle_dupe_cols" and not value:
  788. raise ValueError("Setting mangle_dupe_cols=False is not supported yet")
  789. else:
  790. options[argname] = value
  791. for argname, default in _c_parser_defaults.items():
  792. if argname in kwds:
  793. value = kwds[argname]
  794. if engine != "c" and value != default:
  795. if "python" in engine and argname not in _python_unsupported:
  796. pass
  797. elif value == _deprecated_defaults.get(argname, default):
  798. pass
  799. else:
  800. raise ValueError(
  801. f"The {repr(argname)} option is not supported with the "
  802. f"{repr(engine)} engine"
  803. )
  804. else:
  805. value = _deprecated_defaults.get(argname, default)
  806. options[argname] = value
  807. if engine == "python-fwf":
  808. for argname, default in _fwf_defaults.items():
  809. options[argname] = kwds.get(argname, default)
  810. return options
  811. def _check_file_or_buffer(self, f, engine):
  812. # see gh-16530
  813. if is_file_like(f):
  814. next_attr = "__next__"
  815. # The C engine doesn't need the file-like to have the "next" or
  816. # "__next__" attribute. However, the Python engine explicitly calls
  817. # "next(...)" when iterating through such an object, meaning it
  818. # needs to have that attribute ("next" for Python 2.x, "__next__"
  819. # for Python 3.x)
  820. if engine != "c" and not hasattr(f, next_attr):
  821. msg = "The 'python' engine cannot iterate through this file buffer."
  822. raise ValueError(msg)
  823. return engine
  824. def _clean_options(self, options, engine):
  825. result = options.copy()
  826. engine_specified = self._engine_specified
  827. fallback_reason = None
  828. sep = options["delimiter"]
  829. delim_whitespace = options["delim_whitespace"]
  830. # C engine not supported yet
  831. if engine == "c":
  832. if options["skipfooter"] > 0:
  833. fallback_reason = "the 'c' engine does not support skipfooter"
  834. engine = "python"
  835. encoding = sys.getfilesystemencoding() or "utf-8"
  836. if sep is None and not delim_whitespace:
  837. if engine == "c":
  838. fallback_reason = (
  839. "the 'c' engine does not support "
  840. "sep=None with delim_whitespace=False"
  841. )
  842. engine = "python"
  843. elif sep is not None and len(sep) > 1:
  844. if engine == "c" and sep == r"\s+":
  845. result["delim_whitespace"] = True
  846. del result["delimiter"]
  847. elif engine not in ("python", "python-fwf"):
  848. # wait until regex engine integrated
  849. fallback_reason = (
  850. "the 'c' engine does not support "
  851. "regex separators (separators > 1 char and "
  852. r"different from '\s+' are interpreted as regex)"
  853. )
  854. engine = "python"
  855. elif delim_whitespace:
  856. if "python" in engine:
  857. result["delimiter"] = r"\s+"
  858. elif sep is not None:
  859. encodeable = True
  860. try:
  861. if len(sep.encode(encoding)) > 1:
  862. encodeable = False
  863. except UnicodeDecodeError:
  864. encodeable = False
  865. if not encodeable and engine not in ("python", "python-fwf"):
  866. fallback_reason = (
  867. f"the separator encoded in {encoding} "
  868. "is > 1 char long, and the 'c' engine "
  869. "does not support such separators"
  870. )
  871. engine = "python"
  872. quotechar = options["quotechar"]
  873. if quotechar is not None and isinstance(quotechar, (str, bytes)):
  874. if (
  875. len(quotechar) == 1
  876. and ord(quotechar) > 127
  877. and engine not in ("python", "python-fwf")
  878. ):
  879. fallback_reason = (
  880. "ord(quotechar) > 127, meaning the "
  881. "quotechar is larger than one byte, "
  882. "and the 'c' engine does not support such quotechars"
  883. )
  884. engine = "python"
  885. if fallback_reason and engine_specified:
  886. raise ValueError(fallback_reason)
  887. if engine == "c":
  888. for arg in _c_unsupported:
  889. del result[arg]
  890. if "python" in engine:
  891. for arg in _python_unsupported:
  892. if fallback_reason and result[arg] != _c_parser_defaults[arg]:
  893. raise ValueError(
  894. "Falling back to the 'python' engine because "
  895. f"{fallback_reason}, but this causes {repr(arg)} to be "
  896. "ignored as it is not supported by the 'python' engine."
  897. )
  898. del result[arg]
  899. if fallback_reason:
  900. warnings.warn(
  901. (
  902. "Falling back to the 'python' engine because "
  903. f"{fallback_reason}; you can avoid this warning by specifying "
  904. "engine='python'."
  905. ),
  906. ParserWarning,
  907. stacklevel=5,
  908. )
  909. index_col = options["index_col"]
  910. names = options["names"]
  911. converters = options["converters"]
  912. na_values = options["na_values"]
  913. skiprows = options["skiprows"]
  914. validate_header_arg(options["header"])
  915. depr_warning = ""
  916. for arg in _deprecated_args:
  917. parser_default = _c_parser_defaults[arg]
  918. depr_default = _deprecated_defaults[arg]
  919. msg = (
  920. f"The {repr(arg)} argument has been deprecated and will be "
  921. "removed in a future version."
  922. )
  923. if result.get(arg, depr_default) != depr_default:
  924. depr_warning += msg + "\n\n"
  925. else:
  926. result[arg] = parser_default
  927. if depr_warning != "":
  928. warnings.warn(depr_warning, FutureWarning, stacklevel=2)
  929. if index_col is True:
  930. raise ValueError("The value of index_col couldn't be 'True'")
  931. if _is_index_col(index_col):
  932. if not isinstance(index_col, (list, tuple, np.ndarray)):
  933. index_col = [index_col]
  934. result["index_col"] = index_col
  935. names = list(names) if names is not None else names
  936. # type conversion-related
  937. if converters is not None:
  938. if not isinstance(converters, dict):
  939. raise TypeError(
  940. "Type converters must be a dict or subclass, "
  941. f"input was a {type(converters).__name__}"
  942. )
  943. else:
  944. converters = {}
  945. # Converting values to NA
  946. keep_default_na = options["keep_default_na"]
  947. na_values, na_fvalues = _clean_na_values(na_values, keep_default_na)
  948. # handle skiprows; this is internally handled by the
  949. # c-engine, so only need for python parsers
  950. if engine != "c":
  951. if is_integer(skiprows):
  952. skiprows = list(range(skiprows))
  953. if skiprows is None:
  954. skiprows = set()
  955. elif not callable(skiprows):
  956. skiprows = set(skiprows)
  957. # put stuff back
  958. result["names"] = names
  959. result["converters"] = converters
  960. result["na_values"] = na_values
  961. result["na_fvalues"] = na_fvalues
  962. result["skiprows"] = skiprows
  963. return result, engine
  964. def __next__(self):
  965. try:
  966. return self.get_chunk()
  967. except StopIteration:
  968. self.close()
  969. raise
  970. def _make_engine(self, engine="c"):
  971. if engine == "c":
  972. self._engine = CParserWrapper(self.f, **self.options)
  973. else:
  974. if engine == "python":
  975. klass = PythonParser
  976. elif engine == "python-fwf":
  977. klass = FixedWidthFieldParser
  978. else:
  979. raise ValueError(
  980. f"Unknown engine: {engine} (valid options "
  981. 'are "c", "python", or "python-fwf")'
  982. )
  983. self._engine = klass(self.f, **self.options)
  984. def _failover_to_python(self):
  985. raise AbstractMethodError(self)
  986. def read(self, nrows=None):
  987. nrows = _validate_integer("nrows", nrows)
  988. ret = self._engine.read(nrows)
  989. # May alter columns / col_dict
  990. index, columns, col_dict = self._create_index(ret)
  991. if index is None:
  992. if col_dict:
  993. # Any column is actually fine:
  994. new_rows = len(next(iter(col_dict.values())))
  995. index = RangeIndex(self._currow, self._currow + new_rows)
  996. else:
  997. new_rows = 0
  998. else:
  999. new_rows = len(index)
  1000. df = DataFrame(col_dict, columns=columns, index=index)
  1001. self._currow += new_rows
  1002. if self.squeeze and len(df.columns) == 1:
  1003. return df[df.columns[0]].copy()
  1004. return df
  1005. def _create_index(self, ret):
  1006. index, columns, col_dict = ret
  1007. return index, columns, col_dict
  1008. def get_chunk(self, size=None):
  1009. if size is None:
  1010. size = self.chunksize
  1011. if self.nrows is not None:
  1012. if self._currow >= self.nrows:
  1013. raise StopIteration
  1014. size = min(size, self.nrows - self._currow)
  1015. return self.read(nrows=size)
  1016. def _is_index_col(col):
  1017. return col is not None and col is not False
  1018. def _is_potential_multi_index(columns):
  1019. """
  1020. Check whether or not the `columns` parameter
  1021. could be converted into a MultiIndex.
  1022. Parameters
  1023. ----------
  1024. columns : array-like
  1025. Object which may or may not be convertible into a MultiIndex
  1026. Returns
  1027. -------
  1028. boolean : Whether or not columns could become a MultiIndex
  1029. """
  1030. return (
  1031. len(columns)
  1032. and not isinstance(columns, MultiIndex)
  1033. and all(isinstance(c, tuple) for c in columns)
  1034. )
  1035. def _evaluate_usecols(usecols, names):
  1036. """
  1037. Check whether or not the 'usecols' parameter
  1038. is a callable. If so, enumerates the 'names'
  1039. parameter and returns a set of indices for
  1040. each entry in 'names' that evaluates to True.
  1041. If not a callable, returns 'usecols'.
  1042. """
  1043. if callable(usecols):
  1044. return {i for i, name in enumerate(names) if usecols(name)}
  1045. return usecols
  1046. def _validate_usecols_names(usecols, names):
  1047. """
  1048. Validates that all usecols are present in a given
  1049. list of names. If not, raise a ValueError that
  1050. shows what usecols are missing.
  1051. Parameters
  1052. ----------
  1053. usecols : iterable of usecols
  1054. The columns to validate are present in names.
  1055. names : iterable of names
  1056. The column names to check against.
  1057. Returns
  1058. -------
  1059. usecols : iterable of usecols
  1060. The `usecols` parameter if the validation succeeds.
  1061. Raises
  1062. ------
  1063. ValueError : Columns were missing. Error message will list them.
  1064. """
  1065. missing = [c for c in usecols if c not in names]
  1066. if len(missing) > 0:
  1067. raise ValueError(
  1068. f"Usecols do not match columns, columns expected but not found: {missing}"
  1069. )
  1070. return usecols
  1071. def _validate_skipfooter_arg(skipfooter):
  1072. """
  1073. Validate the 'skipfooter' parameter.
  1074. Checks whether 'skipfooter' is a non-negative integer.
  1075. Raises a ValueError if that is not the case.
  1076. Parameters
  1077. ----------
  1078. skipfooter : non-negative integer
  1079. The number of rows to skip at the end of the file.
  1080. Returns
  1081. -------
  1082. validated_skipfooter : non-negative integer
  1083. The original input if the validation succeeds.
  1084. Raises
  1085. ------
  1086. ValueError : 'skipfooter' was not a non-negative integer.
  1087. """
  1088. if not is_integer(skipfooter):
  1089. raise ValueError("skipfooter must be an integer")
  1090. if skipfooter < 0:
  1091. raise ValueError("skipfooter cannot be negative")
  1092. return skipfooter
  1093. def _validate_usecols_arg(usecols):
  1094. """
  1095. Validate the 'usecols' parameter.
  1096. Checks whether or not the 'usecols' parameter contains all integers
  1097. (column selection by index), strings (column by name) or is a callable.
  1098. Raises a ValueError if that is not the case.
  1099. Parameters
  1100. ----------
  1101. usecols : list-like, callable, or None
  1102. List of columns to use when parsing or a callable that can be used
  1103. to filter a list of table columns.
  1104. Returns
  1105. -------
  1106. usecols_tuple : tuple
  1107. A tuple of (verified_usecols, usecols_dtype).
  1108. 'verified_usecols' is either a set if an array-like is passed in or
  1109. 'usecols' if a callable or None is passed in.
  1110. 'usecols_dtype` is the inferred dtype of 'usecols' if an array-like
  1111. is passed in or None if a callable or None is passed in.
  1112. """
  1113. msg = (
  1114. "'usecols' must either be list-like of all strings, all unicode, "
  1115. "all integers or a callable."
  1116. )
  1117. if usecols is not None:
  1118. if callable(usecols):
  1119. return usecols, None
  1120. if not is_list_like(usecols):
  1121. # see gh-20529
  1122. #
  1123. # Ensure it is iterable container but not string.
  1124. raise ValueError(msg)
  1125. usecols_dtype = lib.infer_dtype(usecols, skipna=False)
  1126. if usecols_dtype not in ("empty", "integer", "string"):
  1127. raise ValueError(msg)
  1128. usecols = set(usecols)
  1129. return usecols, usecols_dtype
  1130. return usecols, None
  1131. def _validate_parse_dates_arg(parse_dates):
  1132. """
  1133. Check whether or not the 'parse_dates' parameter
  1134. is a non-boolean scalar. Raises a ValueError if
  1135. that is the case.
  1136. """
  1137. msg = (
  1138. "Only booleans, lists, and dictionaries are accepted "
  1139. "for the 'parse_dates' parameter"
  1140. )
  1141. if parse_dates is not None:
  1142. if is_scalar(parse_dates):
  1143. if not lib.is_bool(parse_dates):
  1144. raise TypeError(msg)
  1145. elif not isinstance(parse_dates, (list, dict)):
  1146. raise TypeError(msg)
  1147. return parse_dates
  1148. class ParserBase:
  1149. def __init__(self, kwds):
  1150. self.names = kwds.get("names")
  1151. self.orig_names = None
  1152. self.prefix = kwds.pop("prefix", None)
  1153. self.index_col = kwds.get("index_col", None)
  1154. self.unnamed_cols = set()
  1155. self.index_names = None
  1156. self.col_names = None
  1157. self.parse_dates = _validate_parse_dates_arg(kwds.pop("parse_dates", False))
  1158. self.date_parser = kwds.pop("date_parser", None)
  1159. self.dayfirst = kwds.pop("dayfirst", False)
  1160. self.keep_date_col = kwds.pop("keep_date_col", False)
  1161. self.na_values = kwds.get("na_values")
  1162. self.na_fvalues = kwds.get("na_fvalues")
  1163. self.na_filter = kwds.get("na_filter", False)
  1164. self.keep_default_na = kwds.get("keep_default_na", True)
  1165. self.true_values = kwds.get("true_values")
  1166. self.false_values = kwds.get("false_values")
  1167. self.mangle_dupe_cols = kwds.get("mangle_dupe_cols", True)
  1168. self.infer_datetime_format = kwds.pop("infer_datetime_format", False)
  1169. self.cache_dates = kwds.pop("cache_dates", True)
  1170. self._date_conv = _make_date_converter(
  1171. date_parser=self.date_parser,
  1172. dayfirst=self.dayfirst,
  1173. infer_datetime_format=self.infer_datetime_format,
  1174. cache_dates=self.cache_dates,
  1175. )
  1176. # validate header options for mi
  1177. self.header = kwds.get("header")
  1178. if isinstance(self.header, (list, tuple, np.ndarray)):
  1179. if not all(map(is_integer, self.header)):
  1180. raise ValueError("header must be integer or list of integers")
  1181. if any(i < 0 for i in self.header):
  1182. raise ValueError(
  1183. "cannot specify multi-index header with negative integers"
  1184. )
  1185. if kwds.get("usecols"):
  1186. raise ValueError(
  1187. "cannot specify usecols when specifying a multi-index header"
  1188. )
  1189. if kwds.get("names"):
  1190. raise ValueError(
  1191. "cannot specify names when specifying a multi-index header"
  1192. )
  1193. # validate index_col that only

Large files files are truncated, but you can click here to view the full file