PageRenderTime 61ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 1ms

/pandas/core/frame.py

http://github.com/pydata/pandas
Python | 10964 lines | 10896 code | 26 blank | 42 comment | 80 complexity | c9914df2f7e7376efe60854a7496065d MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. """
  2. DataFrame
  3. ---------
  4. An efficient 2D container for potentially mixed-type time series or other
  5. labeled data series.
  6. Similar to its R counterpart, data.frame, except providing automatic data
  7. alignment and a host of useful data manipulation methods having to do with the
  8. labeling information
  9. """
  10. from __future__ import annotations
  11. import collections
  12. from collections import abc
  13. import datetime
  14. import functools
  15. from io import StringIO
  16. import itertools
  17. import mmap
  18. from textwrap import dedent
  19. from typing import (
  20. IO,
  21. TYPE_CHECKING,
  22. Any,
  23. Callable,
  24. Hashable,
  25. Iterable,
  26. Iterator,
  27. Literal,
  28. Sequence,
  29. cast,
  30. overload,
  31. )
  32. import warnings
  33. import numpy as np
  34. import numpy.ma as ma
  35. from pandas._config import get_option
  36. from pandas._libs import (
  37. algos as libalgos,
  38. lib,
  39. properties,
  40. )
  41. from pandas._libs.hashtable import duplicated
  42. from pandas._libs.lib import no_default
  43. from pandas._typing import (
  44. AggFuncType,
  45. AnyArrayLike,
  46. ArrayLike,
  47. Axes,
  48. Axis,
  49. ColspaceArgType,
  50. CompressionOptions,
  51. Dtype,
  52. DtypeObj,
  53. FilePathOrBuffer,
  54. FillnaOptions,
  55. FloatFormatType,
  56. FormattersType,
  57. Frequency,
  58. IndexKeyFunc,
  59. IndexLabel,
  60. Level,
  61. PythonFuncType,
  62. Renamer,
  63. Scalar,
  64. StorageOptions,
  65. Suffixes,
  66. TimedeltaConvertibleTypes,
  67. TimestampConvertibleTypes,
  68. ValueKeyFunc,
  69. npt,
  70. )
  71. from pandas.compat._optional import import_optional_dependency
  72. from pandas.compat.numpy import function as nv
  73. from pandas.util._decorators import (
  74. Appender,
  75. Substitution,
  76. deprecate_kwarg,
  77. deprecate_nonkeyword_arguments,
  78. doc,
  79. rewrite_axis_style_signature,
  80. )
  81. from pandas.util._validators import (
  82. validate_ascending,
  83. validate_axis_style_args,
  84. validate_bool_kwarg,
  85. validate_percentile,
  86. )
  87. from pandas.core.dtypes.cast import (
  88. construct_1d_arraylike_from_scalar,
  89. construct_2d_arraylike_from_scalar,
  90. find_common_type,
  91. infer_dtype_from_scalar,
  92. invalidate_string_dtypes,
  93. maybe_box_native,
  94. maybe_downcast_to_dtype,
  95. validate_numeric_casting,
  96. )
  97. from pandas.core.dtypes.common import (
  98. ensure_platform_int,
  99. infer_dtype_from_object,
  100. is_1d_only_ea_dtype,
  101. is_1d_only_ea_obj,
  102. is_bool_dtype,
  103. is_dataclass,
  104. is_datetime64_any_dtype,
  105. is_dict_like,
  106. is_dtype_equal,
  107. is_extension_array_dtype,
  108. is_float,
  109. is_float_dtype,
  110. is_hashable,
  111. is_integer,
  112. is_integer_dtype,
  113. is_iterator,
  114. is_list_like,
  115. is_object_dtype,
  116. is_scalar,
  117. is_sequence,
  118. pandas_dtype,
  119. )
  120. from pandas.core.dtypes.dtypes import ExtensionDtype
  121. from pandas.core.dtypes.missing import (
  122. isna,
  123. notna,
  124. )
  125. from pandas.core import (
  126. algorithms,
  127. common as com,
  128. generic,
  129. nanops,
  130. ops,
  131. )
  132. from pandas.core.accessor import CachedAccessor
  133. from pandas.core.apply import (
  134. reconstruct_func,
  135. relabel_result,
  136. )
  137. from pandas.core.array_algos.take import take_2d_multi
  138. from pandas.core.arraylike import OpsMixin
  139. from pandas.core.arrays import (
  140. DatetimeArray,
  141. ExtensionArray,
  142. TimedeltaArray,
  143. )
  144. from pandas.core.arrays.sparse import SparseFrameAccessor
  145. from pandas.core.construction import (
  146. extract_array,
  147. sanitize_array,
  148. sanitize_masked_array,
  149. )
  150. from pandas.core.generic import (
  151. NDFrame,
  152. _shared_docs,
  153. )
  154. from pandas.core.indexers import check_key_length
  155. from pandas.core.indexes.api import (
  156. DatetimeIndex,
  157. Index,
  158. PeriodIndex,
  159. default_index,
  160. ensure_index,
  161. ensure_index_from_sequences,
  162. )
  163. from pandas.core.indexes.multi import (
  164. MultiIndex,
  165. maybe_droplevels,
  166. )
  167. from pandas.core.indexing import (
  168. check_bool_indexer,
  169. convert_to_index_sliceable,
  170. )
  171. from pandas.core.internals import (
  172. ArrayManager,
  173. BlockManager,
  174. )
  175. from pandas.core.internals.construction import (
  176. arrays_to_mgr,
  177. dataclasses_to_dicts,
  178. dict_to_mgr,
  179. mgr_to_mgr,
  180. ndarray_to_mgr,
  181. nested_data_to_arrays,
  182. rec_array_to_mgr,
  183. reorder_arrays,
  184. to_arrays,
  185. treat_as_nested,
  186. )
  187. from pandas.core.reshape.melt import melt
  188. from pandas.core.series import Series
  189. from pandas.core.sorting import (
  190. get_group_index,
  191. lexsort_indexer,
  192. nargsort,
  193. )
  194. from pandas.io.common import get_handle
  195. from pandas.io.formats import (
  196. console,
  197. format as fmt,
  198. )
  199. from pandas.io.formats.info import (
  200. BaseInfo,
  201. DataFrameInfo,
  202. )
  203. import pandas.plotting
  204. if TYPE_CHECKING:
  205. from pandas.core.groupby.generic import DataFrameGroupBy
  206. from pandas.core.resample import Resampler
  207. from pandas.io.formats.style import Styler
  208. # ---------------------------------------------------------------------
  209. # Docstring templates
  210. _shared_doc_kwargs = {
  211. "axes": "index, columns",
  212. "klass": "DataFrame",
  213. "axes_single_arg": "{0 or 'index', 1 or 'columns'}",
  214. "axis": """axis : {0 or 'index', 1 or 'columns'}, default 0
  215. If 0 or 'index': apply function to each column.
  216. If 1 or 'columns': apply function to each row.""",
  217. "inplace": """
  218. inplace : bool, default False
  219. If True, performs operation inplace and returns None.""",
  220. "optional_by": """
  221. by : str or list of str
  222. Name or list of names to sort by.
  223. - if `axis` is 0 or `'index'` then `by` may contain index
  224. levels and/or column labels.
  225. - if `axis` is 1 or `'columns'` then `by` may contain column
  226. levels and/or index labels.""",
  227. "optional_labels": """labels : array-like, optional
  228. New labels / index to conform the axis specified by 'axis' to.""",
  229. "optional_axis": """axis : int or str, optional
  230. Axis to target. Can be either the axis name ('index', 'columns')
  231. or number (0, 1).""",
  232. "replace_iloc": """
  233. This differs from updating with ``.loc`` or ``.iloc``, which require
  234. you to specify a location to update with some value.""",
  235. }
  236. _numeric_only_doc = """numeric_only : bool or None, default None
  237. Include only float, int, boolean data. If None, will attempt to use
  238. everything, then use only numeric data
  239. """
  240. _merge_doc = """
  241. Merge DataFrame or named Series objects with a database-style join.
  242. A named Series object is treated as a DataFrame with a single named column.
  243. The join is done on columns or indexes. If joining columns on
  244. columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes
  245. on indexes or indexes on a column or columns, the index will be passed on.
  246. When performing a cross merge, no column specifications to merge on are
  247. allowed.
  248. .. warning::
  249. If both key columns contain rows where the key is a null value, those
  250. rows will be matched against each other. This is different from usual SQL
  251. join behaviour and can lead to unexpected results.
  252. Parameters
  253. ----------%s
  254. right : DataFrame or named Series
  255. Object to merge with.
  256. how : {'left', 'right', 'outer', 'inner', 'cross'}, default 'inner'
  257. Type of merge to be performed.
  258. * left: use only keys from left frame, similar to a SQL left outer join;
  259. preserve key order.
  260. * right: use only keys from right frame, similar to a SQL right outer join;
  261. preserve key order.
  262. * outer: use union of keys from both frames, similar to a SQL full outer
  263. join; sort keys lexicographically.
  264. * inner: use intersection of keys from both frames, similar to a SQL inner
  265. join; preserve the order of the left keys.
  266. * cross: creates the cartesian product from both frames, preserves the order
  267. of the left keys.
  268. .. versionadded:: 1.2.0
  269. on : label or list
  270. Column or index level names to join on. These must be found in both
  271. DataFrames. If `on` is None and not merging on indexes then this defaults
  272. to the intersection of the columns in both DataFrames.
  273. left_on : label or list, or array-like
  274. Column or index level names to join on in the left DataFrame. Can also
  275. be an array or list of arrays of the length of the left DataFrame.
  276. These arrays are treated as if they are columns.
  277. right_on : label or list, or array-like
  278. Column or index level names to join on in the right DataFrame. Can also
  279. be an array or list of arrays of the length of the right DataFrame.
  280. These arrays are treated as if they are columns.
  281. left_index : bool, default False
  282. Use the index from the left DataFrame as the join key(s). If it is a
  283. MultiIndex, the number of keys in the other DataFrame (either the index
  284. or a number of columns) must match the number of levels.
  285. right_index : bool, default False
  286. Use the index from the right DataFrame as the join key. Same caveats as
  287. left_index.
  288. sort : bool, default False
  289. Sort the join keys lexicographically in the result DataFrame. If False,
  290. the order of the join keys depends on the join type (how keyword).
  291. suffixes : list-like, default is ("_x", "_y")
  292. A length-2 sequence where each element is optionally a string
  293. indicating the suffix to add to overlapping column names in
  294. `left` and `right` respectively. Pass a value of `None` instead
  295. of a string to indicate that the column name from `left` or
  296. `right` should be left as-is, with no suffix. At least one of the
  297. values must not be None.
  298. copy : bool, default True
  299. If False, avoid copy if possible.
  300. indicator : bool or str, default False
  301. If True, adds a column to the output DataFrame called "_merge" with
  302. information on the source of each row. The column can be given a different
  303. name by providing a string argument. The column will have a Categorical
  304. type with the value of "left_only" for observations whose merge key only
  305. appears in the left DataFrame, "right_only" for observations
  306. whose merge key only appears in the right DataFrame, and "both"
  307. if the observation's merge key is found in both DataFrames.
  308. validate : str, optional
  309. If specified, checks if merge is of specified type.
  310. * "one_to_one" or "1:1": check if merge keys are unique in both
  311. left and right datasets.
  312. * "one_to_many" or "1:m": check if merge keys are unique in left
  313. dataset.
  314. * "many_to_one" or "m:1": check if merge keys are unique in right
  315. dataset.
  316. * "many_to_many" or "m:m": allowed, but does not result in checks.
  317. Returns
  318. -------
  319. DataFrame
  320. A DataFrame of the two merged objects.
  321. See Also
  322. --------
  323. merge_ordered : Merge with optional filling/interpolation.
  324. merge_asof : Merge on nearest keys.
  325. DataFrame.join : Similar method using indices.
  326. Notes
  327. -----
  328. Support for specifying index levels as the `on`, `left_on`, and
  329. `right_on` parameters was added in version 0.23.0
  330. Support for merging named Series objects was added in version 0.24.0
  331. Examples
  332. --------
  333. >>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
  334. ... 'value': [1, 2, 3, 5]})
  335. >>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],
  336. ... 'value': [5, 6, 7, 8]})
  337. >>> df1
  338. lkey value
  339. 0 foo 1
  340. 1 bar 2
  341. 2 baz 3
  342. 3 foo 5
  343. >>> df2
  344. rkey value
  345. 0 foo 5
  346. 1 bar 6
  347. 2 baz 7
  348. 3 foo 8
  349. Merge df1 and df2 on the lkey and rkey columns. The value columns have
  350. the default suffixes, _x and _y, appended.
  351. >>> df1.merge(df2, left_on='lkey', right_on='rkey')
  352. lkey value_x rkey value_y
  353. 0 foo 1 foo 5
  354. 1 foo 1 foo 8
  355. 2 foo 5 foo 5
  356. 3 foo 5 foo 8
  357. 4 bar 2 bar 6
  358. 5 baz 3 baz 7
  359. Merge DataFrames df1 and df2 with specified left and right suffixes
  360. appended to any overlapping columns.
  361. >>> df1.merge(df2, left_on='lkey', right_on='rkey',
  362. ... suffixes=('_left', '_right'))
  363. lkey value_left rkey value_right
  364. 0 foo 1 foo 5
  365. 1 foo 1 foo 8
  366. 2 foo 5 foo 5
  367. 3 foo 5 foo 8
  368. 4 bar 2 bar 6
  369. 5 baz 3 baz 7
  370. Merge DataFrames df1 and df2, but raise an exception if the DataFrames have
  371. any overlapping columns.
  372. >>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False))
  373. Traceback (most recent call last):
  374. ...
  375. ValueError: columns overlap but no suffix specified:
  376. Index(['value'], dtype='object')
  377. >>> df1 = pd.DataFrame({'a': ['foo', 'bar'], 'b': [1, 2]})
  378. >>> df2 = pd.DataFrame({'a': ['foo', 'baz'], 'c': [3, 4]})
  379. >>> df1
  380. a b
  381. 0 foo 1
  382. 1 bar 2
  383. >>> df2
  384. a c
  385. 0 foo 3
  386. 1 baz 4
  387. >>> df1.merge(df2, how='inner', on='a')
  388. a b c
  389. 0 foo 1 3
  390. >>> df1.merge(df2, how='left', on='a')
  391. a b c
  392. 0 foo 1 3.0
  393. 1 bar 2 NaN
  394. >>> df1 = pd.DataFrame({'left': ['foo', 'bar']})
  395. >>> df2 = pd.DataFrame({'right': [7, 8]})
  396. >>> df1
  397. left
  398. 0 foo
  399. 1 bar
  400. >>> df2
  401. right
  402. 0 7
  403. 1 8
  404. >>> df1.merge(df2, how='cross')
  405. left right
  406. 0 foo 7
  407. 1 foo 8
  408. 2 bar 7
  409. 3 bar 8
  410. """
  411. # -----------------------------------------------------------------------
  412. # DataFrame class
  413. class DataFrame(NDFrame, OpsMixin):
  414. """
  415. Two-dimensional, size-mutable, potentially heterogeneous tabular data.
  416. Data structure also contains labeled axes (rows and columns).
  417. Arithmetic operations align on both row and column labels. Can be
  418. thought of as a dict-like container for Series objects. The primary
  419. pandas data structure.
  420. Parameters
  421. ----------
  422. data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
  423. Dict can contain Series, arrays, constants, dataclass or list-like objects. If
  424. data is a dict, column order follows insertion-order. If a dict contains Series
  425. which have an index defined, it is aligned by its index.
  426. .. versionchanged:: 0.25.0
  427. If data is a list of dicts, column order follows insertion-order.
  428. index : Index or array-like
  429. Index to use for resulting frame. Will default to RangeIndex if
  430. no indexing information part of input data and no index provided.
  431. columns : Index or array-like
  432. Column labels to use for resulting frame when data does not have them,
  433. defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels,
  434. will perform column selection instead.
  435. dtype : dtype, default None
  436. Data type to force. Only a single dtype is allowed. If None, infer.
  437. copy : bool or None, default None
  438. Copy data from inputs.
  439. For dict data, the default of None behaves like ``copy=True``. For DataFrame
  440. or 2d ndarray input, the default of None behaves like ``copy=False``.
  441. .. versionchanged:: 1.3.0
  442. See Also
  443. --------
  444. DataFrame.from_records : Constructor from tuples, also record arrays.
  445. DataFrame.from_dict : From dicts of Series, arrays, or dicts.
  446. read_csv : Read a comma-separated values (csv) file into DataFrame.
  447. read_table : Read general delimited file into DataFrame.
  448. read_clipboard : Read text from clipboard into DataFrame.
  449. Examples
  450. --------
  451. Constructing DataFrame from a dictionary.
  452. >>> d = {'col1': [1, 2], 'col2': [3, 4]}
  453. >>> df = pd.DataFrame(data=d)
  454. >>> df
  455. col1 col2
  456. 0 1 3
  457. 1 2 4
  458. Notice that the inferred dtype is int64.
  459. >>> df.dtypes
  460. col1 int64
  461. col2 int64
  462. dtype: object
  463. To enforce a single dtype:
  464. >>> df = pd.DataFrame(data=d, dtype=np.int8)
  465. >>> df.dtypes
  466. col1 int8
  467. col2 int8
  468. dtype: object
  469. Constructing DataFrame from a dictionary including Series:
  470. >>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])}
  471. >>> pd.DataFrame(data=d, index=[0, 1, 2, 3])
  472. col1 col2
  473. 0 0 NaN
  474. 1 1 NaN
  475. 2 2 2.0
  476. 3 3 3.0
  477. Constructing DataFrame from numpy ndarray:
  478. >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
  479. ... columns=['a', 'b', 'c'])
  480. >>> df2
  481. a b c
  482. 0 1 2 3
  483. 1 4 5 6
  484. 2 7 8 9
  485. Constructing DataFrame from a numpy ndarray that has labeled columns:
  486. >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
  487. ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")])
  488. >>> df3 = pd.DataFrame(data, columns=['c', 'a'])
  489. ...
  490. >>> df3
  491. c a
  492. 0 3 1
  493. 1 6 4
  494. 2 9 7
  495. Constructing DataFrame from dataclass:
  496. >>> from dataclasses import make_dataclass
  497. >>> Point = make_dataclass("Point", [("x", int), ("y", int)])
  498. >>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
  499. x y
  500. 0 0 0
  501. 1 0 3
  502. 2 2 3
  503. """
  504. _internal_names_set = {"columns", "index"} | NDFrame._internal_names_set
  505. _typ = "dataframe"
  506. _HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray)
  507. _accessors: set[str] = {"sparse"}
  508. _hidden_attrs: frozenset[str] = NDFrame._hidden_attrs | frozenset([])
  509. _mgr: BlockManager | ArrayManager
  510. @property
  511. def _constructor(self) -> type[DataFrame]:
  512. return DataFrame
  513. _constructor_sliced: type[Series] = Series
  514. # ----------------------------------------------------------------------
  515. # Constructors
  516. def __init__(
  517. self,
  518. data=None,
  519. index: Axes | None = None,
  520. columns: Axes | None = None,
  521. dtype: Dtype | None = None,
  522. copy: bool | None = None,
  523. ):
  524. if copy is None:
  525. if isinstance(data, dict) or data is None:
  526. # retain pre-GH#38939 default behavior
  527. copy = True
  528. else:
  529. copy = False
  530. if data is None:
  531. data = {}
  532. if dtype is not None:
  533. dtype = self._validate_dtype(dtype)
  534. if isinstance(data, DataFrame):
  535. data = data._mgr
  536. if isinstance(data, (BlockManager, ArrayManager)):
  537. # first check if a Manager is passed without any other arguments
  538. # -> use fastpath (without checking Manager type)
  539. if index is None and columns is None and dtype is None and not copy:
  540. # GH#33357 fastpath
  541. NDFrame.__init__(self, data)
  542. return
  543. manager = get_option("mode.data_manager")
  544. if isinstance(data, (BlockManager, ArrayManager)):
  545. mgr = self._init_mgr(
  546. data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy
  547. )
  548. elif isinstance(data, dict):
  549. # GH#38939 de facto copy defaults to False only in non-dict cases
  550. mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager)
  551. elif isinstance(data, ma.MaskedArray):
  552. import numpy.ma.mrecords as mrecords
  553. # masked recarray
  554. if isinstance(data, mrecords.MaskedRecords):
  555. mgr = rec_array_to_mgr(
  556. data,
  557. index,
  558. columns,
  559. dtype,
  560. copy,
  561. typ=manager,
  562. )
  563. warnings.warn(
  564. "Support for MaskedRecords is deprecated and will be "
  565. "removed in a future version. Pass "
  566. "{name: data[name] for name in data.dtype.names} instead.",
  567. FutureWarning,
  568. stacklevel=2,
  569. )
  570. # a masked array
  571. else:
  572. data = sanitize_masked_array(data)
  573. mgr = ndarray_to_mgr(
  574. data,
  575. index,
  576. columns,
  577. dtype=dtype,
  578. copy=copy,
  579. typ=manager,
  580. )
  581. elif isinstance(data, (np.ndarray, Series, Index)):
  582. if data.dtype.names:
  583. # i.e. numpy structured array
  584. data = cast(np.ndarray, data)
  585. mgr = rec_array_to_mgr(
  586. data,
  587. index,
  588. columns,
  589. dtype,
  590. copy,
  591. typ=manager,
  592. )
  593. elif getattr(data, "name", None) is not None:
  594. # i.e. Series/Index with non-None name
  595. mgr = dict_to_mgr(
  596. # error: Item "ndarray" of "Union[ndarray, Series, Index]" has no
  597. # attribute "name"
  598. {data.name: data}, # type: ignore[union-attr]
  599. index,
  600. columns,
  601. dtype=dtype,
  602. typ=manager,
  603. )
  604. else:
  605. mgr = ndarray_to_mgr(
  606. data,
  607. index,
  608. columns,
  609. dtype=dtype,
  610. copy=copy,
  611. typ=manager,
  612. )
  613. # For data is list-like, or Iterable (will consume into list)
  614. elif is_list_like(data):
  615. if not isinstance(data, (abc.Sequence, ExtensionArray)):
  616. data = list(data)
  617. if len(data) > 0:
  618. if is_dataclass(data[0]):
  619. data = dataclasses_to_dicts(data)
  620. if treat_as_nested(data):
  621. if columns is not None:
  622. # error: Argument 1 to "ensure_index" has incompatible type
  623. # "Collection[Any]"; expected "Union[Union[Union[ExtensionArray,
  624. # ndarray], Index, Series], Sequence[Any]]"
  625. columns = ensure_index(columns) # type: ignore[arg-type]
  626. arrays, columns, index = nested_data_to_arrays(
  627. # error: Argument 3 to "nested_data_to_arrays" has incompatible
  628. # type "Optional[Collection[Any]]"; expected "Optional[Index]"
  629. data,
  630. columns,
  631. index, # type: ignore[arg-type]
  632. dtype,
  633. )
  634. mgr = arrays_to_mgr(
  635. arrays,
  636. columns,
  637. index,
  638. dtype=dtype,
  639. typ=manager,
  640. )
  641. else:
  642. mgr = ndarray_to_mgr(
  643. data,
  644. index,
  645. columns,
  646. dtype=dtype,
  647. copy=copy,
  648. typ=manager,
  649. )
  650. else:
  651. mgr = dict_to_mgr(
  652. {},
  653. index,
  654. columns,
  655. dtype=dtype,
  656. typ=manager,
  657. )
  658. # For data is scalar
  659. else:
  660. if index is None or columns is None:
  661. raise ValueError("DataFrame constructor not properly called!")
  662. # Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
  663. # expected "Union[Union[Union[ExtensionArray, ndarray],
  664. # Index, Series], Sequence[Any]]"
  665. index = ensure_index(index) # type: ignore[arg-type]
  666. # Argument 1 to "ensure_index" has incompatible type "Collection[Any]";
  667. # expected "Union[Union[Union[ExtensionArray, ndarray],
  668. # Index, Series], Sequence[Any]]"
  669. columns = ensure_index(columns) # type: ignore[arg-type]
  670. if not dtype:
  671. dtype, _ = infer_dtype_from_scalar(data, pandas_dtype=True)
  672. # For data is a scalar extension dtype
  673. if isinstance(dtype, ExtensionDtype):
  674. # TODO(EA2D): special case not needed with 2D EAs
  675. values = [
  676. construct_1d_arraylike_from_scalar(data, len(index), dtype)
  677. for _ in range(len(columns))
  678. ]
  679. mgr = arrays_to_mgr(values, columns, index, dtype=None, typ=manager)
  680. else:
  681. arr2d = construct_2d_arraylike_from_scalar(
  682. data,
  683. len(index),
  684. len(columns),
  685. dtype,
  686. copy,
  687. )
  688. mgr = ndarray_to_mgr(
  689. arr2d,
  690. index,
  691. columns,
  692. dtype=arr2d.dtype,
  693. copy=False,
  694. typ=manager,
  695. )
  696. # ensure correct Manager type according to settings
  697. mgr = mgr_to_mgr(mgr, typ=manager)
  698. NDFrame.__init__(self, mgr)
  699. # ----------------------------------------------------------------------
  700. @property
  701. def axes(self) -> list[Index]:
  702. """
  703. Return a list representing the axes of the DataFrame.
  704. It has the row axis labels and column axis labels as the only members.
  705. They are returned in that order.
  706. Examples
  707. --------
  708. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  709. >>> df.axes
  710. [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'],
  711. dtype='object')]
  712. """
  713. return [self.index, self.columns]
  714. @property
  715. def shape(self) -> tuple[int, int]:
  716. """
  717. Return a tuple representing the dimensionality of the DataFrame.
  718. See Also
  719. --------
  720. ndarray.shape : Tuple of array dimensions.
  721. Examples
  722. --------
  723. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
  724. >>> df.shape
  725. (2, 2)
  726. >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4],
  727. ... 'col3': [5, 6]})
  728. >>> df.shape
  729. (2, 3)
  730. """
  731. return len(self.index), len(self.columns)
  732. @property
  733. def _is_homogeneous_type(self) -> bool:
  734. """
  735. Whether all the columns in a DataFrame have the same type.
  736. Returns
  737. -------
  738. bool
  739. See Also
  740. --------
  741. Index._is_homogeneous_type : Whether the object has a single
  742. dtype.
  743. MultiIndex._is_homogeneous_type : Whether all the levels of a
  744. MultiIndex have the same dtype.
  745. Examples
  746. --------
  747. >>> DataFrame({"A": [1, 2], "B": [3, 4]})._is_homogeneous_type
  748. True
  749. >>> DataFrame({"A": [1, 2], "B": [3.0, 4.0]})._is_homogeneous_type
  750. False
  751. Items with the same type but different sizes are considered
  752. different types.
  753. >>> DataFrame({
  754. ... "A": np.array([1, 2], dtype=np.int32),
  755. ... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous_type
  756. False
  757. """
  758. if isinstance(self._mgr, ArrayManager):
  759. return len({arr.dtype for arr in self._mgr.arrays}) == 1
  760. if self._mgr.any_extension_types:
  761. return len({block.dtype for block in self._mgr.blocks}) == 1
  762. else:
  763. return not self._is_mixed_type
  764. @property
  765. def _can_fast_transpose(self) -> bool:
  766. """
  767. Can we transpose this DataFrame without creating any new array objects.
  768. """
  769. if isinstance(self._mgr, ArrayManager):
  770. return False
  771. blocks = self._mgr.blocks
  772. if len(blocks) != 1:
  773. return False
  774. dtype = blocks[0].dtype
  775. # TODO(EA2D) special case would be unnecessary with 2D EAs
  776. return not is_1d_only_ea_dtype(dtype)
  777. # error: Return type "Union[ndarray, DatetimeArray, TimedeltaArray]" of
  778. # "_values" incompatible with return type "ndarray" in supertype "NDFrame"
  779. @property
  780. def _values( # type: ignore[override]
  781. self,
  782. ) -> np.ndarray | DatetimeArray | TimedeltaArray:
  783. """
  784. Analogue to ._values that may return a 2D ExtensionArray.
  785. """
  786. self._consolidate_inplace()
  787. mgr = self._mgr
  788. if isinstance(mgr, ArrayManager):
  789. if len(mgr.arrays) == 1 and not is_1d_only_ea_obj(mgr.arrays[0]):
  790. # error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]"
  791. # has no attribute "reshape"
  792. return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr]
  793. return self.values
  794. blocks = mgr.blocks
  795. if len(blocks) != 1:
  796. return self.values
  797. arr = blocks[0].values
  798. if arr.ndim == 1:
  799. # non-2D ExtensionArray
  800. return self.values
  801. # more generally, whatever we allow in NDArrayBackedExtensionBlock
  802. arr = cast("np.ndarray | DatetimeArray | TimedeltaArray", arr)
  803. return arr.T
  804. # ----------------------------------------------------------------------
  805. # Rendering Methods
  806. def _repr_fits_vertical_(self) -> bool:
  807. """
  808. Check length against max_rows.
  809. """
  810. max_rows = get_option("display.max_rows")
  811. return len(self) <= max_rows
  812. def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool:
  813. """
  814. Check if full repr fits in horizontal boundaries imposed by the display
  815. options width and max_columns.
  816. In case of non-interactive session, no boundaries apply.
  817. `ignore_width` is here so ipynb+HTML output can behave the way
  818. users expect. display.max_columns remains in effect.
  819. GH3541, GH3573
  820. """
  821. width, height = console.get_console_size()
  822. max_columns = get_option("display.max_columns")
  823. nb_columns = len(self.columns)
  824. # exceed max columns
  825. if (max_columns and nb_columns > max_columns) or (
  826. (not ignore_width) and width and nb_columns > (width // 2)
  827. ):
  828. return False
  829. # used by repr_html under IPython notebook or scripts ignore terminal
  830. # dims
  831. if ignore_width or not console.in_interactive_session():
  832. return True
  833. if get_option("display.width") is not None or console.in_ipython_frontend():
  834. # check at least the column row for excessive width
  835. max_rows = 1
  836. else:
  837. max_rows = get_option("display.max_rows")
  838. # when auto-detecting, so width=None and not in ipython front end
  839. # check whether repr fits horizontal by actually checking
  840. # the width of the rendered repr
  841. buf = StringIO()
  842. # only care about the stuff we'll actually print out
  843. # and to_string on entire frame may be expensive
  844. d = self
  845. if max_rows is not None: # unlimited rows
  846. # min of two, where one may be None
  847. d = d.iloc[: min(max_rows, len(d))]
  848. else:
  849. return True
  850. d.to_string(buf=buf)
  851. value = buf.getvalue()
  852. repr_width = max(len(line) for line in value.split("\n"))
  853. return repr_width < width
  854. def _info_repr(self) -> bool:
  855. """
  856. True if the repr should show the info view.
  857. """
  858. info_repr_option = get_option("display.large_repr") == "info"
  859. return info_repr_option and not (
  860. self._repr_fits_horizontal_() and self._repr_fits_vertical_()
  861. )
  862. def __repr__(self) -> str:
  863. """
  864. Return a string representation for a particular DataFrame.
  865. """
  866. buf = StringIO("")
  867. if self._info_repr():
  868. self.info(buf=buf)
  869. return buf.getvalue()
  870. repr_params = fmt.get_dataframe_repr_params()
  871. self.to_string(buf=buf, **repr_params)
  872. return buf.getvalue()
  873. def _repr_html_(self) -> str | None:
  874. """
  875. Return a html representation for a particular DataFrame.
  876. Mainly for IPython notebook.
  877. """
  878. if self._info_repr():
  879. buf = StringIO("")
  880. self.info(buf=buf)
  881. # need to escape the <class>, should be the first line.
  882. val = buf.getvalue().replace("<", r"&lt;", 1)
  883. val = val.replace(">", r"&gt;", 1)
  884. return "<pre>" + val + "</pre>"
  885. if get_option("display.notebook_repr_html"):
  886. max_rows = get_option("display.max_rows")
  887. min_rows = get_option("display.min_rows")
  888. max_cols = get_option("display.max_columns")
  889. show_dimensions = get_option("display.show_dimensions")
  890. formatter = fmt.DataFrameFormatter(
  891. self,
  892. columns=None,
  893. col_space=None,
  894. na_rep="NaN",
  895. formatters=None,
  896. float_format=None,
  897. sparsify=None,
  898. justify=None,
  899. index_names=True,
  900. header=True,
  901. index=True,
  902. bold_rows=True,
  903. escape=True,
  904. max_rows=max_rows,
  905. min_rows=min_rows,
  906. max_cols=max_cols,
  907. show_dimensions=show_dimensions,
  908. decimal=".",
  909. )
  910. return fmt.DataFrameRenderer(formatter).to_html(notebook=True)
  911. else:
  912. return None
  913. @Substitution(
  914. header_type="bool or sequence",
  915. header="Write out the column names. If a list of strings "
  916. "is given, it is assumed to be aliases for the "
  917. "column names",
  918. col_space_type="int, list or dict of int",
  919. col_space="The minimum width of each column",
  920. )
  921. @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring)
  922. def to_string(
  923. self,
  924. buf: FilePathOrBuffer[str] | None = None,
  925. columns: Sequence[str] | None = None,
  926. col_space: int | None = None,
  927. header: bool | Sequence[str] = True,
  928. index: bool = True,
  929. na_rep: str = "NaN",
  930. formatters: fmt.FormattersType | None = None,
  931. float_format: fmt.FloatFormatType | None = None,
  932. sparsify: bool | None = None,
  933. index_names: bool = True,
  934. justify: str | None = None,
  935. max_rows: int | None = None,
  936. min_rows: int | None = None,
  937. max_cols: int | None = None,
  938. show_dimensions: bool = False,
  939. decimal: str = ".",
  940. line_width: int | None = None,
  941. max_colwidth: int | None = None,
  942. encoding: str | None = None,
  943. ) -> str | None:
  944. """
  945. Render a DataFrame to a console-friendly tabular output.
  946. %(shared_params)s
  947. line_width : int, optional
  948. Width to wrap a line in characters.
  949. max_colwidth : int, optional
  950. Max width to truncate each column in characters. By default, no limit.
  951. .. versionadded:: 1.0.0
  952. encoding : str, default "utf-8"
  953. Set character encoding.
  954. .. versionadded:: 1.0
  955. %(returns)s
  956. See Also
  957. --------
  958. to_html : Convert DataFrame to HTML.
  959. Examples
  960. --------
  961. >>> d = {'col1': [1, 2, 3], 'col2': [4, 5, 6]}
  962. >>> df = pd.DataFrame(d)
  963. >>> print(df.to_string())
  964. col1 col2
  965. 0 1 4
  966. 1 2 5
  967. 2 3 6
  968. """
  969. from pandas import option_context
  970. with option_context("display.max_colwidth", max_colwidth):
  971. formatter = fmt.DataFrameFormatter(
  972. self,
  973. columns=columns,
  974. col_space=col_space,
  975. na_rep=na_rep,
  976. formatters=formatters,
  977. float_format=float_format,
  978. sparsify=sparsify,
  979. justify=justify,
  980. index_names=index_names,
  981. header=header,
  982. index=index,
  983. min_rows=min_rows,
  984. max_rows=max_rows,
  985. max_cols=max_cols,
  986. show_dimensions=show_dimensions,
  987. decimal=decimal,
  988. )
  989. return fmt.DataFrameRenderer(formatter).to_string(
  990. buf=buf,
  991. encoding=encoding,
  992. line_width=line_width,
  993. )
  994. # ----------------------------------------------------------------------
  995. @property
  996. def style(self) -> Styler:
  997. """
  998. Returns a Styler object.
  999. Contains methods for building a styled HTML representation of the DataFrame.
  1000. See Also
  1001. --------
  1002. io.formats.style.Styler : Helps style a DataFrame or Series according to the
  1003. data with HTML and CSS.
  1004. """
  1005. from pandas.io.formats.style import Styler
  1006. return Styler(self)
  1007. _shared_docs[
  1008. "items"
  1009. ] = r"""
  1010. Iterate over (column name, Series) pairs.
  1011. Iterates over the DataFrame columns, returning a tuple with
  1012. the column name and the content as a Series.
  1013. Yields
  1014. ------
  1015. label : object
  1016. The column names for the DataFrame being iterated over.
  1017. content : Series
  1018. The column entries belonging to each label, as a Series.
  1019. See Also
  1020. --------
  1021. DataFrame.iterrows : Iterate over DataFrame rows as
  1022. (index, Series) pairs.
  1023. DataFrame.itertuples : Iterate over DataFrame rows as namedtuples
  1024. of the values.
  1025. Examples
  1026. --------
  1027. >>> df = pd.DataFrame({'species': ['bear', 'bear', 'marsupial'],
  1028. ... 'population': [1864, 22000, 80000]},
  1029. ... index=['panda', 'polar', 'koala'])
  1030. >>> df
  1031. species population
  1032. panda bear 1864
  1033. polar bear 22000
  1034. koala marsupial 80000
  1035. >>> for label, content in df.items():
  1036. ... print(f'label: {label}')
  1037. ... print(f'content: {content}', sep='\n')
  1038. ...
  1039. label: species
  1040. content:
  1041. panda bear
  1042. polar bear
  1043. koala marsupial
  1044. Name: species, dtype: object
  1045. label: population
  1046. content:
  1047. panda 1864
  1048. polar 22000
  1049. koala 80000
  1050. Name: population, dtype: int64
  1051. """
  1052. @Appender(_shared_docs["items"])
  1053. def items(self) -> Iterable[tuple[Hashable, Series]]:
  1054. if self.columns.is_unique and hasattr(self, "_item_cache"):
  1055. for k in self.columns:
  1056. yield k, self._get_item_cache(k)
  1057. else:
  1058. for i, k in enumerate(self.columns):
  1059. yield k, self._ixs(i, axis=1)
  1060. @Appender(_shared_docs["items"])
  1061. def iteritems(self) -> Iterable[tuple[Hashable, Series]]:
  1062. yield from self.items()
  1063. def iterrows(self) -> Iterable[tuple[Hashable, Series]]:
  1064. """
  1065. Iterate over DataFrame rows as (index, Series) pairs.
  1066. Yields
  1067. ------
  1068. index : label or tuple of label
  1069. The index of the row. A tuple for a `MultiIndex`.
  1070. data : Series
  1071. The data of the row as a Series.
  1072. See Also
  1073. --------
  1074. DataFrame.itertuples : Iterate over DataFrame rows as namedtuples of the values.
  1075. DataFrame.items : Iterate over (column name, Series) pairs.
  1076. Notes
  1077. -----
  1078. 1. Because ``iterrows`` returns a Series for each row,
  1079. it does **not** preserve dtypes across the rows (dtypes are
  1080. preserved across columns for DataFrames). For example,
  1081. >>> df = pd.DataFrame([[1, 1.5]], columns=['int', 'float'])
  1082. >>> row = next(df.iterrows())[1]
  1083. >>> row
  1084. int 1.0
  1085. float 1.5
  1086. Name: 0, dtype: float64
  1087. >>> print(row['int'].dtype)
  1088. float64
  1089. >>> print(df['int'].dtype)
  1090. int64
  1091. To preserve dtypes while iterating over the rows, it is better
  1092. to use :meth:`itertuples` which returns namedtuples of the values
  1093. and which is generally faster than ``iterrows``.
  1094. 2. You should **never modify** something you are iterating over.
  1095. This is not guaranteed to work in all cases. Depending on the
  1096. data types, the iterator returns a copy and not a view, and writing
  1097. to it will have no effect.
  1098. """
  1099. columns = self.columns
  1100. klass = self._constructor_sliced
  1101. for k, v in zip(self.index, self.values):
  1102. s = klass(v, index=columns, name=k)
  1103. yield k, s
  1104. def itertuples(
  1105. self, index: bool = True, name: str | None = "Pandas"
  1106. ) -> Iterable[tuple[Any, ...]]:
  1107. """
  1108. Iterate over DataFrame rows as namedtuples.
  1109. Parameters
  1110. ----------
  1111. index : bool, default True
  1112. If True, return the index as the first element of the tuple.
  1113. name : str or None, default "Pandas"
  1114. The name of the returned namedtuples or None to return regular
  1115. tuples.
  1116. Returns
  1117. -------
  1118. iterator
  1119. An object to iterate over namedtuples for each row in the
  1120. DataFrame with the first field possibly being the index and
  1121. following fields being the column values.
  1122. See Also
  1123. --------
  1124. DataFrame.iterrows : Iterate over DataFrame rows as (index, Series)
  1125. pairs.
  1126. DataFrame.items : Iterate over (column name, Series) pairs.
  1127. Notes
  1128. -----
  1129. The column names will be renamed to positional names if they are
  1130. invalid Python identifiers, repeated, or start with an underscore.
  1131. On python versions < 3.7 regular tuples are returned for DataFrames
  1132. with a large number of columns (>254).
  1133. Examples
  1134. --------
  1135. >>> df = pd.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]},
  1136. ... index=['dog', 'hawk'])
  1137. >>> df
  1138. num_legs num_wings
  1139. dog 4 0
  1140. hawk 2 2
  1141. >>> for row in df.itertuples():
  1142. ... print(row)
  1143. ...
  1144. Pandas(Index='dog', num_legs=4, num_wings=0)
  1145. Pandas(Index='hawk', num_legs=2, num_wings=2)
  1146. By setting the `index` parameter to False we can remove the index
  1147. as the first element of the tuple:
  1148. >>> for row in df.itertuples(index=False):
  1149. ... print(row)
  1150. ...
  1151. Pandas(num_legs=4, num_wings=0)
  1152. Pandas(num_legs=2, num_wings=2)
  1153. With the `name` parameter set we set a custom name for the yielded
  1154. namedtuples:
  1155. >>> for row in df.itertuples(name='Animal'):
  1156. ... print(row)
  1157. ...
  1158. Animal(Index='dog', num_legs=4, num_wings=0)
  1159. Animal(Index='hawk', num_legs=2, num_wings=2)
  1160. """
  1161. arrays = []
  1162. fields = list(self.columns)
  1163. if index:
  1164. arrays.append(self.index)
  1165. fields.insert(0, "Index")
  1166. # use integer indexing because of possible duplicate column names
  1167. arrays.extend(self.iloc[:, k] for k in range(len(self.columns)))
  1168. if name is not None:
  1169. # https://github.com/python/mypy/issues/9046
  1170. # error: namedtuple() expects a string literal as the first argument
  1171. itertuple = collections.namedtuple( # type: ignore[misc]
  1172. name, fields, rename=True
  1173. )
  1174. return map(itertuple._make, zip(*arrays))
  1175. # fallback to regular tuples
  1176. return zip(*arrays)
  1177. def __len__(self) -> int:
  1178. """
  1179. Returns length of info axis, but here we use the index.
  1180. """
  1181. return len(self.index)
  1182. @overload
  1183. def dot(self, other: Series) -> Series:
  1184. ...
  1185. @overload
  1186. def dot(self, other: DataFrame | Index | ArrayLike) -> DataFrame:
  1187. ...
  1188. def dot(self, other: AnyArrayLike | DataFrame) -> DataFrame | Series:
  1189. """
  1190. Compute the matrix multiplication between the DataFrame and other.
  1191. This method computes the matrix product between the DataFrame and the
  1192. values of an other Series, DataFrame or a numpy array.
  1193. It can also be called using ``self @ other`` in Python >= 3.5.
  1194. Parameters
  1195. ----------
  1196. other : Series, DataFrame or array-like
  1197. The other object to compute the matrix product with.
  1198. Returns
  1199. -------
  1200. Series or DataFrame
  1201. If other is a Series, return the matrix product between self and
  1202. other as a Series. If other is a DataFrame or a numpy.array, return
  1203. the matrix product of self and other in a DataFrame of a np.array.
  1204. See Also
  1205. --------
  1206. Series.dot: Similar method for Series.
  1207. Notes
  1208. -----
  1209. The dimensions of DataFrame and other must be compatible in order to
  1210. compute the matrix multiplication. In addition, the column names of
  1211. DataFrame and the index of other must contain the same values, as they
  1212. will be aligned prior to the multiplication.
  1213. The dot method for Series computes the inner product, instead of the
  1214. matrix product here.
  1215. Examples
  1216. --------
  1217. Here we multiply a DataFrame with a Series.
  1218. >>> df = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
  1219. >>> s = pd.Series([1, 1, 2, 1])
  1220. >>> df.dot(s)
  1221. 0 -4
  1222. 1 5
  1223. dtype: int64
  1224. Here we multiply a DataFrame with another DataFrame.
  1225. >>> other = pd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]])
  1226. >>> df.dot(other)
  1227. 0 1
  1228. 0 1 4
  1229. 1 2 2
  1230. Note that the dot method give the same result as @
  1231. >>> df @ other
  1232. 0 1
  1233. 0 1 4
  1234. 1 2 2
  1235. The dot method works also if other is an np.array.
  1236. >>> arr = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]])
  1237. >>> df.dot(arr)
  1238. 0 1
  1239. 0 1 4
  1240. 1 2 2
  1241. Note how shuffling of the objects does not change the result.
  1242. >>> s2 = s.reindex([1, 0, 2, 3])
  1243. >>> df.dot(s2)
  1244. 0 -4
  1245. 1 5
  1246. dtype: int64
  1247. """
  1248. if isinstance(other, (Series, DataFrame)):
  1249. common = self.columns.union(other.index)
  1250. if len(common) > len(self.columns) or len(common) > len(other.index):
  1251. raise ValueError("matrices are not aligned")
  1252. left = self.reindex(columns=common, copy=False)
  1253. right = other.reindex(index=common, copy=False)
  1254. lvals = left.values
  1255. rvals = right._values
  1256. else:
  1257. left = self
  1258. lvals = self.values
  1259. rvals = np.asarray(other)
  1260. if lvals.shape[1] != rvals.shape[0]:
  1261. raise ValueError(
  1262. f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
  1263. )
  1264. if isinstance(other, DataFrame):
  1265. return self._constructor(
  1266. np.dot(lvals, rvals), index=left.index, columns=other.columns
  1267. )
  1268. elif isinstance(other, Series):
  1269. return self._constructor_sliced(np.dot(lvals, rvals), index=left.index)
  1270. elif isinstance(rvals, (np.ndarray, Index)):
  1271. result = np.dot(lvals, rvals)
  1272. if result.ndim == 2:
  1273. return self._constructor(result, index=left.index)
  1274. else:
  1275. return self._constructor_sliced(result, index=left.index)
  1276. else: # pragma: no cover
  1277. raise TypeError(f"unsupported type: {type(other)}")
  1278. @overload
  1279. def __matmul__(self, other: Series) -> Series:
  1280. ...
  1281. @overload
  1282. def __matmul__(
  1283. self, other: AnyArrayLike | DataFrame | Series
  1284. ) -> DataFrame | Series:
  1285. ...
  1286. def __matmul__(
  1287. self, other: AnyArrayLike | DataFrame | Series
  1288. ) -> DataFrame | Series:
  1289. """
  1290. Matrix multiplication using binary `@` operator in Python>=3.5.
  1291. """
  1292. return self.dot(other)
  1293. def __rmatmul__(self, other):
  1294. """
  1295. Matrix multiplication using binary `@` operator in Python>=3.5.
  1296. """
  1297. try:
  1298. return self.T.dot(np.transpose(other)).T
  1299. except ValueError as err:
  1300. if "shape mismatch" not in str(err):
  1301. raise
  1302. # GH#21581 give exception message for original shapes
  1303. msg = f"shapes {np.shape(other)} and {self.shape} not aligned"
  1304. raise ValueError(msg) from err
  1305. # ----------------------------------------------------------------------
  1306. # IO methods (to / from other formats)
  1307. @classmethod
  1308. def from_dict(
  1309. cls,
  1310. data,
  1311. orient: str = "columns",
  1312. dtype: Dtype | None = None,
  1313. columns=None,
  1314. ) -> DataFrame:
  1315. """
  1316. Construct DataFrame from dict of array-like or dicts.
  1317. Creates DataFrame object from dictionary by columns or by index
  1318. allowing dtype specification.
  1319. Parameters
  1320. ----------
  1321. data : dict
  1322. Of the form {field : array-like} or {field : dict}.
  1323. orient : {'columns', 'index', 'tight'}, default 'columns'
  1324. The "orientation" of the data. If the keys of the passed dict
  1325. should be the columns of the resulting DataFrame, pass 'columns'
  1326. (default). Otherwise if the keys should be rows, pass 'index'.
  1327. If 'tight', assume a dict with keys ['index', 'columns', 'data',
  1328. 'index_names', 'column_names'].
  1329. .. versionadded:: 1.4.0
  1330. 'tight' as an allowed value for the ``orient`` argument
  1331. dtype : dtype, default None
  1332. Data type to force, otherwise infer.
  1333. columns : list, default None
  1334. Column labels to use when ``orient='index'``. Raises a ValueError
  1335. if used with ``orient='columns'`` or ``orient='tight'``.
  1336. Returns
  1337. -------
  1338. DataFrame
  1339. See Also
  1340. --------
  1341. DataFrame.from_records : DataFrame from structured ndarray, sequence
  1342. of tuples or dicts, or DataFrame.
  1343. DataFrame : DataFrame object creation using constructor.
  1344. DataFrame.to_dict : Convert the DataFrame to a dictionary.
  1345. Examples
  1346. --------
  1347. By default the keys of the dict become the DataFrame columns:
  1348. >>> data = {'col_1': [3, 2,

Large files files are truncated, but you can click here to view the full file