PageRenderTime 79ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/core/common.py

http://github.com/wesm/pandas
Python | 484 lines | 297 code | 31 blank | 156 comment | 24 complexity | 1b7b8105fca5f599288251ed8dfc51ea MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. """
  2. Misc tools for implementing data structures
  3. Note: pandas.core.common is *not* part of the public API.
  4. """
  5. import collections
  6. from collections import OrderedDict
  7. from datetime import datetime, timedelta
  8. from functools import partial
  9. import inspect
  10. import numpy as np
  11. from pandas._libs import lib, tslibs
  12. import pandas.compat as compat
  13. from pandas.compat import PY36, iteritems
  14. from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
  15. from pandas.core.dtypes.common import (
  16. is_array_like, is_bool_dtype, is_extension_array_dtype, is_integer)
  17. from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
  18. from pandas.core.dtypes.inference import _iterable_not_string
  19. from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
  20. class SettingWithCopyError(ValueError):
  21. pass
  22. class SettingWithCopyWarning(Warning):
  23. pass
  24. def flatten(l):
  25. """
  26. Flatten an arbitrarily nested sequence.
  27. Parameters
  28. ----------
  29. l : sequence
  30. The non string sequence to flatten
  31. Notes
  32. -----
  33. This doesn't consider strings sequences.
  34. Returns
  35. -------
  36. flattened : generator
  37. """
  38. for el in l:
  39. if _iterable_not_string(el):
  40. for s in flatten(el):
  41. yield s
  42. else:
  43. yield el
  44. def consensus_name_attr(objs):
  45. name = objs[0].name
  46. for obj in objs[1:]:
  47. try:
  48. if obj.name != name:
  49. name = None
  50. except ValueError:
  51. name = None
  52. return name
  53. def maybe_box(indexer, values, obj, key):
  54. # if we have multiples coming back, box em
  55. if isinstance(values, np.ndarray):
  56. return obj[indexer.get_loc(key)]
  57. # return the value
  58. return values
  59. def maybe_box_datetimelike(value):
  60. # turn a datetime like into a Timestamp/timedelta as needed
  61. if isinstance(value, (np.datetime64, datetime)):
  62. value = tslibs.Timestamp(value)
  63. elif isinstance(value, (np.timedelta64, timedelta)):
  64. value = tslibs.Timedelta(value)
  65. return value
  66. values_from_object = lib.values_from_object
  67. def is_bool_indexer(key):
  68. # type: (Any) -> bool
  69. """
  70. Check whether `key` is a valid boolean indexer.
  71. Parameters
  72. ----------
  73. key : Any
  74. Only list-likes may be considered boolean indexers.
  75. All other types are not considered a boolean indexer.
  76. For array-like input, boolean ndarrays or ExtensionArrays
  77. with ``_is_boolean`` set are considered boolean indexers.
  78. Returns
  79. -------
  80. bool
  81. Raises
  82. ------
  83. ValueError
  84. When the array is an object-dtype ndarray or ExtensionArray
  85. and contains missing values.
  86. """
  87. na_msg = 'cannot index with vector containing NA / NaN values'
  88. if (isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or
  89. (is_array_like(key) and is_extension_array_dtype(key.dtype))):
  90. if key.dtype == np.object_:
  91. key = np.asarray(values_from_object(key))
  92. if not lib.is_bool_array(key):
  93. if isna(key).any():
  94. raise ValueError(na_msg)
  95. return False
  96. return True
  97. elif is_bool_dtype(key.dtype):
  98. # an ndarray with bool-dtype by definition has no missing values.
  99. # So we only need to check for NAs in ExtensionArrays
  100. if is_extension_array_dtype(key.dtype):
  101. if np.any(key.isna()):
  102. raise ValueError(na_msg)
  103. return True
  104. elif isinstance(key, list):
  105. try:
  106. arr = np.asarray(key)
  107. return arr.dtype == np.bool_ and len(arr) == len(key)
  108. except TypeError: # pragma: no cover
  109. return False
  110. return False
  111. def cast_scalar_indexer(val):
  112. """
  113. To avoid numpy DeprecationWarnings, cast float to integer where valid.
  114. Parameters
  115. ----------
  116. val : scalar
  117. Returns
  118. -------
  119. outval : scalar
  120. """
  121. # assumes lib.is_scalar(val)
  122. if lib.is_float(val) and val == int(val):
  123. return int(val)
  124. return val
  125. def _not_none(*args):
  126. """
  127. Returns a generator consisting of the arguments that are not None.
  128. """
  129. return (arg for arg in args if arg is not None)
  130. def _any_none(*args):
  131. """
  132. Returns a boolean indicating if any argument is None.
  133. """
  134. for arg in args:
  135. if arg is None:
  136. return True
  137. return False
  138. def _all_none(*args):
  139. """
  140. Returns a boolean indicating if all arguments are None.
  141. """
  142. for arg in args:
  143. if arg is not None:
  144. return False
  145. return True
  146. def _any_not_none(*args):
  147. """
  148. Returns a boolean indicating if any argument is not None.
  149. """
  150. for arg in args:
  151. if arg is not None:
  152. return True
  153. return False
  154. def _all_not_none(*args):
  155. """
  156. Returns a boolean indicating if all arguments are not None.
  157. """
  158. for arg in args:
  159. if arg is None:
  160. return False
  161. return True
  162. def count_not_none(*args):
  163. """
  164. Returns the count of arguments that are not None.
  165. """
  166. return sum(x is not None for x in args)
  167. def try_sort(iterable):
  168. listed = list(iterable)
  169. try:
  170. return sorted(listed)
  171. except Exception:
  172. return listed
  173. def dict_keys_to_ordered_list(mapping):
  174. # when pandas drops support for Python < 3.6, this function
  175. # can be replaced by a simple list(mapping.keys())
  176. if PY36 or isinstance(mapping, OrderedDict):
  177. keys = list(mapping.keys())
  178. else:
  179. keys = try_sort(mapping)
  180. return keys
  181. def asarray_tuplesafe(values, dtype=None):
  182. if not (isinstance(values, (list, tuple)) or hasattr(values, '__array__')):
  183. values = list(values)
  184. elif isinstance(values, ABCIndexClass):
  185. return values.values
  186. if isinstance(values, list) and dtype in [np.object_, object]:
  187. return construct_1d_object_array_from_listlike(values)
  188. result = np.asarray(values, dtype=dtype)
  189. if issubclass(result.dtype.type, compat.string_types):
  190. result = np.asarray(values, dtype=object)
  191. if result.ndim == 2:
  192. # Avoid building an array of arrays:
  193. # TODO: verify whether any path hits this except #18819 (invalid)
  194. values = [tuple(x) for x in values]
  195. result = construct_1d_object_array_from_listlike(values)
  196. return result
  197. def index_labels_to_array(labels, dtype=None):
  198. """
  199. Transform label or iterable of labels to array, for use in Index.
  200. Parameters
  201. ----------
  202. dtype : dtype
  203. If specified, use as dtype of the resulting array, otherwise infer.
  204. Returns
  205. -------
  206. array
  207. """
  208. if isinstance(labels, (compat.string_types, tuple)):
  209. labels = [labels]
  210. if not isinstance(labels, (list, np.ndarray)):
  211. try:
  212. labels = list(labels)
  213. except TypeError: # non-iterable
  214. labels = [labels]
  215. labels = asarray_tuplesafe(labels, dtype=dtype)
  216. return labels
  217. def maybe_make_list(obj):
  218. if obj is not None and not isinstance(obj, (tuple, list)):
  219. return [obj]
  220. return obj
  221. def is_null_slice(obj):
  222. """
  223. We have a null slice.
  224. """
  225. return (isinstance(obj, slice) and obj.start is None and
  226. obj.stop is None and obj.step is None)
  227. def is_true_slices(l):
  228. """
  229. Find non-trivial slices in "l": return a list of booleans with same length.
  230. """
  231. return [isinstance(k, slice) and not is_null_slice(k) for k in l]
  232. # TODO: used only once in indexing; belongs elsewhere?
  233. def is_full_slice(obj, l):
  234. """
  235. We have a full length slice.
  236. """
  237. return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and
  238. obj.step is None)
  239. def get_callable_name(obj):
  240. # typical case has name
  241. if hasattr(obj, '__name__'):
  242. return getattr(obj, '__name__')
  243. # some objects don't; could recurse
  244. if isinstance(obj, partial):
  245. return get_callable_name(obj.func)
  246. # fall back to class name
  247. if hasattr(obj, '__call__'):
  248. return obj.__class__.__name__
  249. # everything failed (probably because the argument
  250. # wasn't actually callable); we return None
  251. # instead of the empty string in this case to allow
  252. # distinguishing between no name and a name of ''
  253. return None
  254. def apply_if_callable(maybe_callable, obj, **kwargs):
  255. """
  256. Evaluate possibly callable input using obj and kwargs if it is callable,
  257. otherwise return as it is.
  258. Parameters
  259. ----------
  260. maybe_callable : possibly a callable
  261. obj : NDFrame
  262. **kwargs
  263. """
  264. if callable(maybe_callable):
  265. return maybe_callable(obj, **kwargs)
  266. return maybe_callable
  267. def dict_compat(d):
  268. """
  269. Helper function to convert datetimelike-keyed dicts
  270. to Timestamp-keyed dict.
  271. Parameters
  272. ----------
  273. d: dict like object
  274. Returns
  275. -------
  276. dict
  277. """
  278. return {maybe_box_datetimelike(key): value for key, value in iteritems(d)}
  279. def standardize_mapping(into):
  280. """
  281. Helper function to standardize a supplied mapping.
  282. .. versionadded:: 0.21.0
  283. Parameters
  284. ----------
  285. into : instance or subclass of collections.Mapping
  286. Must be a class, an initialized collections.defaultdict,
  287. or an instance of a collections.Mapping subclass.
  288. Returns
  289. -------
  290. mapping : a collections.Mapping subclass or other constructor
  291. a callable object that can accept an iterator to create
  292. the desired Mapping.
  293. See Also
  294. --------
  295. DataFrame.to_dict
  296. Series.to_dict
  297. """
  298. if not inspect.isclass(into):
  299. if isinstance(into, collections.defaultdict):
  300. return partial(
  301. collections.defaultdict, into.default_factory)
  302. into = type(into)
  303. if not issubclass(into, compat.Mapping):
  304. raise TypeError('unsupported type: {into}'.format(into=into))
  305. elif into == collections.defaultdict:
  306. raise TypeError(
  307. 'to_dict() only accepts initialized defaultdicts')
  308. return into
  309. def random_state(state=None):
  310. """
  311. Helper function for processing random_state arguments.
  312. Parameters
  313. ----------
  314. state : int, np.random.RandomState, None.
  315. If receives an int, passes to np.random.RandomState() as seed.
  316. If receives an np.random.RandomState object, just returns object.
  317. If receives `None`, returns np.random.
  318. If receives anything else, raises an informative ValueError.
  319. Default None.
  320. Returns
  321. -------
  322. np.random.RandomState
  323. """
  324. if is_integer(state):
  325. return np.random.RandomState(state)
  326. elif isinstance(state, np.random.RandomState):
  327. return state
  328. elif state is None:
  329. return np.random
  330. else:
  331. raise ValueError("random_state must be an integer, a numpy "
  332. "RandomState, or None")
  333. def _pipe(obj, func, *args, **kwargs):
  334. """
  335. Apply a function ``func`` to object ``obj`` either by passing obj as the
  336. first argument to the function or, in the case that the func is a tuple,
  337. interpret the first element of the tuple as a function and pass the obj to
  338. that function as a keyword argument whose key is the value of the second
  339. element of the tuple.
  340. Parameters
  341. ----------
  342. func : callable or tuple of (callable, string)
  343. Function to apply to this object or, alternatively, a
  344. ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
  345. string indicating the keyword of `callable`` that expects the
  346. object.
  347. args : iterable, optional
  348. positional arguments passed into ``func``.
  349. kwargs : dict, optional
  350. a dictionary of keyword arguments passed into ``func``.
  351. Returns
  352. -------
  353. object : the return type of ``func``.
  354. """
  355. if isinstance(func, tuple):
  356. func, target = func
  357. if target in kwargs:
  358. msg = '%s is both the pipe target and a keyword argument' % target
  359. raise ValueError(msg)
  360. kwargs[target] = obj
  361. return func(*args, **kwargs)
  362. else:
  363. return func(obj, *args, **kwargs)
  364. def _get_rename_function(mapper):
  365. """
  366. Returns a function that will map names/labels, dependent if mapper
  367. is a dict, Series or just a function.
  368. """
  369. if isinstance(mapper, (compat.Mapping, ABCSeries)):
  370. def f(x):
  371. if x in mapper:
  372. return mapper[x]
  373. else:
  374. return x
  375. else:
  376. f = mapper
  377. return f