PageRenderTime 42ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/core/tools/timedeltas.py

https://github.com/neurodebian/pandas
Python | 191 lines | 167 code | 5 blank | 19 comment | 2 complexity | 4e00a3ece4200e57d3007e9d91f0b3ee MD5 | raw file
  1. """
  2. timedelta support tools
  3. """
  4. import numpy as np
  5. import pandas as pd
  6. import pandas._libs.tslib as tslib
  7. from pandas.core.dtypes.common import (
  8. _ensure_object,
  9. is_integer_dtype,
  10. is_timedelta64_dtype,
  11. is_list_like)
  12. from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
  13. def to_timedelta(arg, unit='ns', box=True, errors='raise'):
  14. """
  15. Convert argument to timedelta
  16. Parameters
  17. ----------
  18. arg : string, timedelta, list, tuple, 1-d array, or Series
  19. unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, which is an
  20. integer/float number
  21. box : boolean, default True
  22. - If True returns a Timedelta/TimedeltaIndex of the results
  23. - if False returns a np.timedelta64 or ndarray of values of dtype
  24. timedelta64[ns]
  25. errors : {'ignore', 'raise', 'coerce'}, default 'raise'
  26. - If 'raise', then invalid parsing will raise an exception
  27. - If 'coerce', then invalid parsing will be set as NaT
  28. - If 'ignore', then invalid parsing will return the input
  29. Returns
  30. -------
  31. ret : timedelta64/arrays of timedelta64 if parsing succeeded
  32. Examples
  33. --------
  34. Parsing a single string to a Timedelta:
  35. >>> pd.to_timedelta('1 days 06:05:01.00003')
  36. Timedelta('1 days 06:05:01.000030')
  37. >>> pd.to_timedelta('15.5us')
  38. Timedelta('0 days 00:00:00.000015')
  39. Parsing a list or array of strings:
  40. >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan'])
  41. TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015', NaT],
  42. dtype='timedelta64[ns]', freq=None)
  43. Converting numbers by specifying the `unit` keyword argument:
  44. >>> pd.to_timedelta(np.arange(5), unit='s')
  45. TimedeltaIndex(['00:00:00', '00:00:01', '00:00:02',
  46. '00:00:03', '00:00:04'],
  47. dtype='timedelta64[ns]', freq=None)
  48. >>> pd.to_timedelta(np.arange(5), unit='d')
  49. TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
  50. dtype='timedelta64[ns]', freq=None)
  51. See also
  52. --------
  53. pandas.DataFrame.astype : Cast argument to a specified dtype.
  54. pandas.to_datetime : Convert argument to datetime.
  55. """
  56. unit = _validate_timedelta_unit(unit)
  57. if errors not in ('ignore', 'raise', 'coerce'):
  58. raise ValueError("errors must be one of 'ignore', "
  59. "'raise', or 'coerce'}")
  60. if arg is None:
  61. return arg
  62. elif isinstance(arg, ABCSeries):
  63. from pandas import Series
  64. values = _convert_listlike(arg._values, unit=unit,
  65. box=False, errors=errors)
  66. return Series(values, index=arg.index, name=arg.name)
  67. elif isinstance(arg, ABCIndexClass):
  68. return _convert_listlike(arg, unit=unit, box=box,
  69. errors=errors, name=arg.name)
  70. elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 0:
  71. # extract array scalar and process below
  72. arg = arg.item()
  73. elif is_list_like(arg) and getattr(arg, 'ndim', 1) == 1:
  74. return _convert_listlike(arg, unit=unit, box=box, errors=errors)
  75. elif getattr(arg, 'ndim', 1) > 1:
  76. raise TypeError('arg must be a string, timedelta, list, tuple, '
  77. '1-d array, or Series')
  78. # ...so it must be a scalar value. Return scalar.
  79. return _coerce_scalar_to_timedelta_type(arg, unit=unit,
  80. box=box, errors=errors)
  81. _unit_map = {
  82. 'Y': 'Y',
  83. 'y': 'Y',
  84. 'W': 'W',
  85. 'w': 'W',
  86. 'D': 'D',
  87. 'd': 'D',
  88. 'days': 'D',
  89. 'Days': 'D',
  90. 'day': 'D',
  91. 'Day': 'D',
  92. 'M': 'M',
  93. 'H': 'h',
  94. 'h': 'h',
  95. 'm': 'm',
  96. 'T': 'm',
  97. 'S': 's',
  98. 's': 's',
  99. 'L': 'ms',
  100. 'MS': 'ms',
  101. 'ms': 'ms',
  102. 'US': 'us',
  103. 'us': 'us',
  104. 'NS': 'ns',
  105. 'ns': 'ns',
  106. }
  107. def _validate_timedelta_unit(arg):
  108. """ provide validation / translation for timedelta short units """
  109. try:
  110. return _unit_map[arg]
  111. except:
  112. if arg is None:
  113. return 'ns'
  114. raise ValueError("invalid timedelta unit {arg} provided"
  115. .format(arg=arg))
  116. def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'):
  117. """Convert string 'r' to a timedelta object."""
  118. try:
  119. result = tslib.convert_to_timedelta64(r, unit)
  120. except ValueError:
  121. if errors == 'raise':
  122. raise
  123. elif errors == 'ignore':
  124. return r
  125. # coerce
  126. result = pd.NaT
  127. if box:
  128. result = tslib.Timedelta(result)
  129. return result
  130. def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
  131. """Convert a list of objects to a timedelta index object."""
  132. if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'):
  133. arg = np.array(list(arg), dtype='O')
  134. # these are shortcut-able
  135. if is_timedelta64_dtype(arg):
  136. value = arg.astype('timedelta64[ns]')
  137. elif is_integer_dtype(arg):
  138. value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype(
  139. 'timedelta64[ns]', copy=False)
  140. else:
  141. try:
  142. value = tslib.array_to_timedelta64(_ensure_object(arg),
  143. unit=unit, errors=errors)
  144. value = value.astype('timedelta64[ns]', copy=False)
  145. except ValueError:
  146. if errors == 'ignore':
  147. return arg
  148. else:
  149. # This else-block accounts for the cases when errors='raise'
  150. # and errors='coerce'. If errors == 'raise', these errors
  151. # should be raised. If errors == 'coerce', we shouldn't
  152. # expect any errors to be raised, since all parsing errors
  153. # cause coercion to pd.NaT. However, if an error / bug is
  154. # introduced that causes an Exception to be raised, we would
  155. # like to surface it.
  156. raise
  157. if box:
  158. from pandas import TimedeltaIndex
  159. value = TimedeltaIndex(value, unit='ns', name=name)
  160. return value