PageRenderTime 42ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/core/common.py

http://github.com/pydata/pandas
Python | 605 lines | 462 code | 37 blank | 106 comment | 24 complexity | 1e9cd711904c6d0477f4f39e0ebe1bb7 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. """
  2. Misc tools for implementing data structures
  3. Note: pandas.core.common is *not* part of the public API.
  4. """
  5. from __future__ import annotations
  6. import builtins
  7. from collections import (
  8. abc,
  9. defaultdict,
  10. )
  11. import contextlib
  12. from functools import partial
  13. import inspect
  14. from typing import (
  15. TYPE_CHECKING,
  16. Any,
  17. Callable,
  18. Collection,
  19. Iterable,
  20. Iterator,
  21. cast,
  22. overload,
  23. )
  24. import warnings
  25. import numpy as np
  26. from pandas._libs import lib
  27. from pandas._typing import (
  28. AnyArrayLike,
  29. ArrayLike,
  30. NpDtype,
  31. RandomState,
  32. Scalar,
  33. T,
  34. )
  35. from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
  36. from pandas.core.dtypes.common import (
  37. is_array_like,
  38. is_bool_dtype,
  39. is_extension_array_dtype,
  40. is_integer,
  41. )
  42. from pandas.core.dtypes.generic import (
  43. ABCExtensionArray,
  44. ABCIndex,
  45. ABCSeries,
  46. )
  47. from pandas.core.dtypes.inference import iterable_not_string
  48. from pandas.core.dtypes.missing import isna
  49. if TYPE_CHECKING:
  50. from pandas import Index
  51. class SettingWithCopyError(ValueError):
  52. pass
  53. class SettingWithCopyWarning(Warning):
  54. pass
  55. def flatten(line):
  56. """
  57. Flatten an arbitrarily nested sequence.
  58. Parameters
  59. ----------
  60. line : sequence
  61. The non string sequence to flatten
  62. Notes
  63. -----
  64. This doesn't consider strings sequences.
  65. Returns
  66. -------
  67. flattened : generator
  68. """
  69. for element in line:
  70. if iterable_not_string(element):
  71. yield from flatten(element)
  72. else:
  73. yield element
  74. def consensus_name_attr(objs):
  75. name = objs[0].name
  76. for obj in objs[1:]:
  77. try:
  78. if obj.name != name:
  79. name = None
  80. except ValueError:
  81. name = None
  82. return name
  83. def is_bool_indexer(key: Any) -> bool:
  84. """
  85. Check whether `key` is a valid boolean indexer.
  86. Parameters
  87. ----------
  88. key : Any
  89. Only list-likes may be considered boolean indexers.
  90. All other types are not considered a boolean indexer.
  91. For array-like input, boolean ndarrays or ExtensionArrays
  92. with ``_is_boolean`` set are considered boolean indexers.
  93. Returns
  94. -------
  95. bool
  96. Whether `key` is a valid boolean indexer.
  97. Raises
  98. ------
  99. ValueError
  100. When the array is an object-dtype ndarray or ExtensionArray
  101. and contains missing values.
  102. See Also
  103. --------
  104. check_array_indexer : Check that `key` is a valid array to index,
  105. and convert to an ndarray.
  106. """
  107. if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (
  108. is_array_like(key) and is_extension_array_dtype(key.dtype)
  109. ):
  110. if key.dtype == np.object_:
  111. key = np.asarray(key)
  112. if not lib.is_bool_array(key):
  113. na_msg = "Cannot mask with non-boolean array containing NA / NaN values"
  114. if lib.infer_dtype(key) == "boolean" and isna(key).any():
  115. # Don't raise on e.g. ["A", "B", np.nan], see
  116. # test_loc_getitem_list_of_labels_categoricalindex_with_na
  117. raise ValueError(na_msg)
  118. return False
  119. return True
  120. elif is_bool_dtype(key.dtype):
  121. return True
  122. elif isinstance(key, list):
  123. # check if np.array(key).dtype would be bool
  124. if len(key) > 0:
  125. if type(key) is not list:
  126. # GH#42461 cython will raise TypeError if we pass a subclass
  127. key = list(key)
  128. return lib.is_bool_list(key)
  129. return False
  130. def cast_scalar_indexer(val, warn_float: bool = False):
  131. """
  132. To avoid numpy DeprecationWarnings, cast float to integer where valid.
  133. Parameters
  134. ----------
  135. val : scalar
  136. warn_float : bool, default False
  137. If True, issue deprecation warning for a float indexer.
  138. Returns
  139. -------
  140. outval : scalar
  141. """
  142. # assumes lib.is_scalar(val)
  143. if lib.is_float(val) and val.is_integer():
  144. if warn_float:
  145. warnings.warn(
  146. "Indexing with a float is deprecated, and will raise an IndexError "
  147. "in pandas 2.0. You can manually convert to an integer key instead.",
  148. FutureWarning,
  149. stacklevel=3,
  150. )
  151. return int(val)
  152. return val
  153. def not_none(*args):
  154. """
  155. Returns a generator consisting of the arguments that are not None.
  156. """
  157. return (arg for arg in args if arg is not None)
  158. def any_none(*args) -> bool:
  159. """
  160. Returns a boolean indicating if any argument is None.
  161. """
  162. return any(arg is None for arg in args)
  163. def all_none(*args) -> bool:
  164. """
  165. Returns a boolean indicating if all arguments are None.
  166. """
  167. return all(arg is None for arg in args)
  168. def any_not_none(*args) -> bool:
  169. """
  170. Returns a boolean indicating if any argument is not None.
  171. """
  172. return any(arg is not None for arg in args)
  173. def all_not_none(*args) -> bool:
  174. """
  175. Returns a boolean indicating if all arguments are not None.
  176. """
  177. return all(arg is not None for arg in args)
  178. def count_not_none(*args) -> int:
  179. """
  180. Returns the count of arguments that are not None.
  181. """
  182. return sum(x is not None for x in args)
  183. def asarray_tuplesafe(values, dtype: NpDtype | None = None) -> np.ndarray:
  184. if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")):
  185. values = list(values)
  186. elif isinstance(values, ABCIndex):
  187. # error: Incompatible return value type (got "Union[ExtensionArray, ndarray]",
  188. # expected "ndarray")
  189. return values._values # type: ignore[return-value]
  190. if isinstance(values, list) and dtype in [np.object_, object]:
  191. return construct_1d_object_array_from_listlike(values)
  192. result = np.asarray(values, dtype=dtype)
  193. if issubclass(result.dtype.type, str):
  194. result = np.asarray(values, dtype=object)
  195. if result.ndim == 2:
  196. # Avoid building an array of arrays:
  197. values = [tuple(x) for x in values]
  198. result = construct_1d_object_array_from_listlike(values)
  199. return result
  200. def index_labels_to_array(labels, dtype: NpDtype | None = None) -> np.ndarray:
  201. """
  202. Transform label or iterable of labels to array, for use in Index.
  203. Parameters
  204. ----------
  205. dtype : dtype
  206. If specified, use as dtype of the resulting array, otherwise infer.
  207. Returns
  208. -------
  209. array
  210. """
  211. if isinstance(labels, (str, tuple)):
  212. labels = [labels]
  213. if not isinstance(labels, (list, np.ndarray)):
  214. try:
  215. labels = list(labels)
  216. except TypeError: # non-iterable
  217. labels = [labels]
  218. labels = asarray_tuplesafe(labels, dtype=dtype)
  219. return labels
  220. def maybe_make_list(obj):
  221. if obj is not None and not isinstance(obj, (tuple, list)):
  222. return [obj]
  223. return obj
  224. def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T:
  225. """
  226. If obj is Iterable but not list-like, consume into list.
  227. """
  228. if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
  229. return list(obj)
  230. obj = cast(Collection, obj)
  231. return obj
  232. def is_null_slice(obj) -> bool:
  233. """
  234. We have a null slice.
  235. """
  236. return (
  237. isinstance(obj, slice)
  238. and obj.start is None
  239. and obj.stop is None
  240. and obj.step is None
  241. )
  242. def is_true_slices(line) -> list[bool]:
  243. """
  244. Find non-trivial slices in "line": return a list of booleans with same length.
  245. """
  246. return [isinstance(k, slice) and not is_null_slice(k) for k in line]
  247. # TODO: used only once in indexing; belongs elsewhere?
  248. def is_full_slice(obj, line: int) -> bool:
  249. """
  250. We have a full length slice.
  251. """
  252. return (
  253. isinstance(obj, slice)
  254. and obj.start == 0
  255. and obj.stop == line
  256. and obj.step is None
  257. )
  258. def get_callable_name(obj):
  259. # typical case has name
  260. if hasattr(obj, "__name__"):
  261. return getattr(obj, "__name__")
  262. # some objects don't; could recurse
  263. if isinstance(obj, partial):
  264. return get_callable_name(obj.func)
  265. # fall back to class name
  266. if callable(obj):
  267. return type(obj).__name__
  268. # everything failed (probably because the argument
  269. # wasn't actually callable); we return None
  270. # instead of the empty string in this case to allow
  271. # distinguishing between no name and a name of ''
  272. return None
  273. def apply_if_callable(maybe_callable, obj, **kwargs):
  274. """
  275. Evaluate possibly callable input using obj and kwargs if it is callable,
  276. otherwise return as it is.
  277. Parameters
  278. ----------
  279. maybe_callable : possibly a callable
  280. obj : NDFrame
  281. **kwargs
  282. """
  283. if callable(maybe_callable):
  284. return maybe_callable(obj, **kwargs)
  285. return maybe_callable
  286. def standardize_mapping(into):
  287. """
  288. Helper function to standardize a supplied mapping.
  289. Parameters
  290. ----------
  291. into : instance or subclass of collections.abc.Mapping
  292. Must be a class, an initialized collections.defaultdict,
  293. or an instance of a collections.abc.Mapping subclass.
  294. Returns
  295. -------
  296. mapping : a collections.abc.Mapping subclass or other constructor
  297. a callable object that can accept an iterator to create
  298. the desired Mapping.
  299. See Also
  300. --------
  301. DataFrame.to_dict
  302. Series.to_dict
  303. """
  304. if not inspect.isclass(into):
  305. if isinstance(into, defaultdict):
  306. return partial(defaultdict, into.default_factory)
  307. into = type(into)
  308. if not issubclass(into, abc.Mapping):
  309. raise TypeError(f"unsupported type: {into}")
  310. elif into == defaultdict:
  311. raise TypeError("to_dict() only accepts initialized defaultdicts")
  312. return into
  313. @overload
  314. def random_state(state: np.random.Generator) -> np.random.Generator:
  315. ...
  316. @overload
  317. def random_state(
  318. state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None,
  319. ) -> np.random.RandomState:
  320. ...
  321. def random_state(state: RandomState | None = None):
  322. """
  323. Helper function for processing random_state arguments.
  324. Parameters
  325. ----------
  326. state : int, array-like, BitGenerator, Generator, np.random.RandomState, None.
  327. If receives an int, array-like, or BitGenerator, passes to
  328. np.random.RandomState() as seed.
  329. If receives an np.random RandomState or Generator, just returns that unchanged.
  330. If receives `None`, returns np.random.
  331. If receives anything else, raises an informative ValueError.
  332. .. versionchanged:: 1.1.0
  333. array-like and BitGenerator object now passed to np.random.RandomState()
  334. as seed
  335. Default None.
  336. Returns
  337. -------
  338. np.random.RandomState or np.random.Generator. If state is None, returns np.random
  339. """
  340. if (
  341. is_integer(state)
  342. or is_array_like(state)
  343. or isinstance(state, np.random.BitGenerator)
  344. ):
  345. # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int,
  346. # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected
  347. # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]],
  348. # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]],
  349. # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]],
  350. # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
  351. # integer[Any]]]]]]],
  352. # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
  353. # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]],
  354. # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool,
  355. # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]],
  356. # BitGenerator]"
  357. return np.random.RandomState(state) # type: ignore[arg-type]
  358. elif isinstance(state, np.random.RandomState):
  359. return state
  360. elif isinstance(state, np.random.Generator):
  361. return state
  362. elif state is None:
  363. return np.random
  364. else:
  365. raise ValueError(
  366. "random_state must be an integer, array-like, a BitGenerator, Generator, "
  367. "a numpy RandomState, or None"
  368. )
  369. def pipe(
  370. obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
  371. ) -> T:
  372. """
  373. Apply a function ``func`` to object ``obj`` either by passing obj as the
  374. first argument to the function or, in the case that the func is a tuple,
  375. interpret the first element of the tuple as a function and pass the obj to
  376. that function as a keyword argument whose key is the value of the second
  377. element of the tuple.
  378. Parameters
  379. ----------
  380. func : callable or tuple of (callable, str)
  381. Function to apply to this object or, alternatively, a
  382. ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
  383. string indicating the keyword of `callable`` that expects the
  384. object.
  385. *args : iterable, optional
  386. Positional arguments passed into ``func``.
  387. **kwargs : dict, optional
  388. A dictionary of keyword arguments passed into ``func``.
  389. Returns
  390. -------
  391. object : the return type of ``func``.
  392. """
  393. if isinstance(func, tuple):
  394. func, target = func
  395. if target in kwargs:
  396. msg = f"{target} is both the pipe target and a keyword argument"
  397. raise ValueError(msg)
  398. kwargs[target] = obj
  399. return func(*args, **kwargs)
  400. else:
  401. return func(obj, *args, **kwargs)
  402. def get_rename_function(mapper):
  403. """
  404. Returns a function that will map names/labels, dependent if mapper
  405. is a dict, Series or just a function.
  406. """
  407. if isinstance(mapper, (abc.Mapping, ABCSeries)):
  408. def f(x):
  409. if x in mapper:
  410. return mapper[x]
  411. else:
  412. return x
  413. else:
  414. f = mapper
  415. return f
  416. def convert_to_list_like(
  417. values: Scalar | Iterable | AnyArrayLike,
  418. ) -> list | AnyArrayLike:
  419. """
  420. Convert list-like or scalar input to list-like. List, numpy and pandas array-like
  421. inputs are returned unmodified whereas others are converted to list.
  422. """
  423. if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)):
  424. return values
  425. elif isinstance(values, abc.Iterable) and not isinstance(values, str):
  426. return list(values)
  427. return [values]
  428. @contextlib.contextmanager
  429. def temp_setattr(obj, attr: str, value) -> Iterator[None]:
  430. """Temporarily set attribute on an object.
  431. Args:
  432. obj: Object whose attribute will be modified.
  433. attr: Attribute to modify.
  434. value: Value to temporarily set attribute to.
  435. Yields:
  436. obj with modified attribute.
  437. """
  438. old_value = getattr(obj, attr)
  439. setattr(obj, attr, value)
  440. yield obj
  441. setattr(obj, attr, old_value)
  442. def require_length_match(data, index: Index):
  443. """
  444. Check the length of data matches the length of the index.
  445. """
  446. if len(data) != len(index):
  447. raise ValueError(
  448. "Length of values "
  449. f"({len(data)}) "
  450. "does not match length of index "
  451. f"({len(index)})"
  452. )
  453. _builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}
  454. _cython_table = {
  455. builtins.sum: "sum",
  456. builtins.max: "max",
  457. builtins.min: "min",
  458. np.all: "all",
  459. np.any: "any",
  460. np.sum: "sum",
  461. np.nansum: "sum",
  462. np.mean: "mean",
  463. np.nanmean: "mean",
  464. np.prod: "prod",
  465. np.nanprod: "prod",
  466. np.std: "std",
  467. np.nanstd: "std",
  468. np.var: "var",
  469. np.nanvar: "var",
  470. np.median: "median",
  471. np.nanmedian: "median",
  472. np.max: "max",
  473. np.nanmax: "max",
  474. np.min: "min",
  475. np.nanmin: "min",
  476. np.cumprod: "cumprod",
  477. np.nancumprod: "cumprod",
  478. np.cumsum: "cumsum",
  479. np.nancumsum: "cumsum",
  480. }
  481. def get_cython_func(arg: Callable) -> str | None:
  482. """
  483. if we define an internal function for this argument, return it
  484. """
  485. return _cython_table.get(arg)
  486. def is_builtin_func(arg):
  487. """
  488. if we define an builtin function for this argument, return it,
  489. otherwise return the arg
  490. """
  491. return _builtin_table.get(arg, arg)