PageRenderTime 35ms CodeModel.GetById 10ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/__init__.py

https://github.com/jreback/pandas
Python | 286 lines | 276 code | 6 blank | 4 comment | 6 complexity | 48971b95d489c574cd7a88902b19ba73 MD5 | raw file
  1. # flake8: noqa
  2. __docformat__ = "restructuredtext"
  3. # Let users know if they're missing any of our hard dependencies
  4. hard_dependencies = ("numpy", "pytz", "dateutil")
  5. missing_dependencies = []
  6. for dependency in hard_dependencies:
  7. try:
  8. __import__(dependency)
  9. except ImportError as e:
  10. missing_dependencies.append(f"{dependency}: {e}")
  11. if missing_dependencies:
  12. raise ImportError(
  13. "Unable to import required dependencies:\n" + "\n".join(missing_dependencies)
  14. )
  15. del hard_dependencies, dependency, missing_dependencies
  16. # numpy compat
  17. from pandas.compat.numpy import (
  18. np_version_under1p17 as _np_version_under1p17,
  19. np_version_under1p18 as _np_version_under1p18,
  20. is_numpy_dev as _is_numpy_dev,
  21. )
  22. try:
  23. from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
  24. except ImportError as e: # pragma: no cover
  25. # hack but overkill to use re
  26. module = str(e).replace("cannot import name ", "")
  27. raise ImportError(
  28. f"C extension: {module} not built. If you want to import "
  29. "pandas from the source directory, you may need to run "
  30. "'python setup.py build_ext --force' to build the C extensions first."
  31. ) from e
  32. from pandas._config import (
  33. get_option,
  34. set_option,
  35. reset_option,
  36. describe_option,
  37. option_context,
  38. options,
  39. )
  40. # let init-time option registration happen
  41. import pandas.core.config_init
  42. from pandas.core.api import (
  43. # dtype
  44. Int8Dtype,
  45. Int16Dtype,
  46. Int32Dtype,
  47. Int64Dtype,
  48. UInt8Dtype,
  49. UInt16Dtype,
  50. UInt32Dtype,
  51. UInt64Dtype,
  52. Float32Dtype,
  53. Float64Dtype,
  54. CategoricalDtype,
  55. PeriodDtype,
  56. IntervalDtype,
  57. DatetimeTZDtype,
  58. StringDtype,
  59. BooleanDtype,
  60. # missing
  61. NA,
  62. isna,
  63. isnull,
  64. notna,
  65. notnull,
  66. # indexes
  67. Index,
  68. CategoricalIndex,
  69. Int64Index,
  70. UInt64Index,
  71. RangeIndex,
  72. Float64Index,
  73. MultiIndex,
  74. IntervalIndex,
  75. TimedeltaIndex,
  76. DatetimeIndex,
  77. PeriodIndex,
  78. IndexSlice,
  79. # tseries
  80. NaT,
  81. Period,
  82. period_range,
  83. Timedelta,
  84. timedelta_range,
  85. Timestamp,
  86. date_range,
  87. bdate_range,
  88. Interval,
  89. interval_range,
  90. DateOffset,
  91. # conversion
  92. to_numeric,
  93. to_datetime,
  94. to_timedelta,
  95. # misc
  96. Flags,
  97. Grouper,
  98. factorize,
  99. unique,
  100. value_counts,
  101. NamedAgg,
  102. array,
  103. Categorical,
  104. set_eng_float_format,
  105. Series,
  106. DataFrame,
  107. )
  108. from pandas.core.arrays.sparse import SparseDtype
  109. from pandas.tseries.api import infer_freq
  110. from pandas.tseries import offsets
  111. from pandas.core.computation.api import eval
  112. from pandas.core.reshape.api import (
  113. concat,
  114. lreshape,
  115. melt,
  116. wide_to_long,
  117. merge,
  118. merge_asof,
  119. merge_ordered,
  120. crosstab,
  121. pivot,
  122. pivot_table,
  123. get_dummies,
  124. cut,
  125. qcut,
  126. )
  127. import pandas.api
  128. from pandas.util._print_versions import show_versions
  129. from pandas.io.api import (
  130. # excel
  131. ExcelFile,
  132. ExcelWriter,
  133. read_excel,
  134. # parsers
  135. read_csv,
  136. read_fwf,
  137. read_table,
  138. # pickle
  139. read_pickle,
  140. to_pickle,
  141. # pytables
  142. HDFStore,
  143. read_hdf,
  144. # sql
  145. read_sql,
  146. read_sql_query,
  147. read_sql_table,
  148. # misc
  149. read_clipboard,
  150. read_parquet,
  151. read_orc,
  152. read_feather,
  153. read_gbq,
  154. read_html,
  155. read_json,
  156. read_stata,
  157. read_sas,
  158. read_spss,
  159. )
  160. from pandas.io.json import _json_normalize as json_normalize
  161. from pandas.util._tester import test
  162. import pandas.testing
  163. import pandas.arrays
  164. # use the closest tagged version if possible
  165. from ._version import get_versions
  166. v = get_versions()
  167. __version__ = v.get("closest-tag", v["version"])
  168. __git_version__ = v.get("full-revisionid")
  169. del get_versions, v
  170. # GH 27101
  171. def __getattr__(name):
  172. import warnings
  173. if name == "datetime":
  174. warnings.warn(
  175. "The pandas.datetime class is deprecated "
  176. "and will be removed from pandas in a future version. "
  177. "Import from datetime module instead.",
  178. FutureWarning,
  179. stacklevel=2,
  180. )
  181. from datetime import datetime as dt
  182. return dt
  183. elif name == "np":
  184. warnings.warn(
  185. "The pandas.np module is deprecated "
  186. "and will be removed from pandas in a future version. "
  187. "Import numpy directly instead",
  188. FutureWarning,
  189. stacklevel=2,
  190. )
  191. import numpy as np
  192. return np
  193. elif name in {"SparseSeries", "SparseDataFrame"}:
  194. warnings.warn(
  195. f"The {name} class is removed from pandas. Accessing it from "
  196. "the top-level namespace will also be removed in the next version",
  197. FutureWarning,
  198. stacklevel=2,
  199. )
  200. return type(name, (), {})
  201. elif name == "SparseArray":
  202. warnings.warn(
  203. "The pandas.SparseArray class is deprecated "
  204. "and will be removed from pandas in a future version. "
  205. "Use pandas.arrays.SparseArray instead.",
  206. FutureWarning,
  207. stacklevel=2,
  208. )
  209. from pandas.core.arrays.sparse import SparseArray as _SparseArray
  210. return _SparseArray
  211. raise AttributeError(f"module 'pandas' has no attribute '{name}'")
  212. # module level doc-string
  213. __doc__ = """
  214. pandas - a powerful data analysis and manipulation library for Python
  215. =====================================================================
  216. **pandas** is a Python package providing fast, flexible, and expressive data
  217. structures designed to make working with "relational" or "labeled" data both
  218. easy and intuitive. It aims to be the fundamental high-level building block for
  219. doing practical, **real world** data analysis in Python. Additionally, it has
  220. the broader goal of becoming **the most powerful and flexible open source data
  221. analysis / manipulation tool available in any language**. It is already well on
  222. its way toward this goal.
  223. Main Features
  224. -------------
  225. Here are just a few of the things that pandas does well:
  226. - Easy handling of missing data in floating point as well as non-floating
  227. point data.
  228. - Size mutability: columns can be inserted and deleted from DataFrame and
  229. higher dimensional objects
  230. - Automatic and explicit data alignment: objects can be explicitly aligned
  231. to a set of labels, or the user can simply ignore the labels and let
  232. `Series`, `DataFrame`, etc. automatically align the data for you in
  233. computations.
  234. - Powerful, flexible group by functionality to perform split-apply-combine
  235. operations on data sets, for both aggregating and transforming data.
  236. - Make it easy to convert ragged, differently-indexed data in other Python
  237. and NumPy data structures into DataFrame objects.
  238. - Intelligent label-based slicing, fancy indexing, and subsetting of large
  239. data sets.
  240. - Intuitive merging and joining data sets.
  241. - Flexible reshaping and pivoting of data sets.
  242. - Hierarchical labeling of axes (possible to have multiple labels per tick).
  243. - Robust IO tools for loading data from flat files (CSV and delimited),
  244. Excel files, databases, and saving/loading data from the ultrafast HDF5
  245. format.
  246. - Time series-specific functionality: date range generation and frequency
  247. conversion, moving window statistics, date shifting and lagging.
  248. """