PageRenderTime 549ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tests/groupby/test_nth.py

https://github.com/neurodebian/pandas
Python | 253 lines | 193 code | 37 blank | 23 comment | 1 complexity | 4af525847a2cc3e2af2dcc146f25ff72 MD5 | raw file
  1. import numpy as np
  2. import pandas as pd
  3. from pandas import DataFrame, MultiIndex, Index, Series, isna
  4. from pandas.compat import lrange
  5. from pandas.util.testing import (
  6. assert_frame_equal,
  7. assert_produces_warning,
  8. assert_series_equal)
  9. from .common import MixIn
  10. class TestNth(MixIn):
  11. def test_first_last_nth(self):
  12. # tests for first / last / nth
  13. grouped = self.df.groupby('A')
  14. first = grouped.first()
  15. expected = self.df.loc[[1, 0], ['B', 'C', 'D']]
  16. expected.index = Index(['bar', 'foo'], name='A')
  17. expected = expected.sort_index()
  18. assert_frame_equal(first, expected)
  19. nth = grouped.nth(0)
  20. assert_frame_equal(nth, expected)
  21. last = grouped.last()
  22. expected = self.df.loc[[5, 7], ['B', 'C', 'D']]
  23. expected.index = Index(['bar', 'foo'], name='A')
  24. assert_frame_equal(last, expected)
  25. nth = grouped.nth(-1)
  26. assert_frame_equal(nth, expected)
  27. nth = grouped.nth(1)
  28. expected = self.df.loc[[2, 3], ['B', 'C', 'D']].copy()
  29. expected.index = Index(['foo', 'bar'], name='A')
  30. expected = expected.sort_index()
  31. assert_frame_equal(nth, expected)
  32. # it works!
  33. grouped['B'].first()
  34. grouped['B'].last()
  35. grouped['B'].nth(0)
  36. self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan
  37. assert isna(grouped['B'].first()['foo'])
  38. assert isna(grouped['B'].last()['foo'])
  39. assert isna(grouped['B'].nth(0)['foo'])
  40. # v0.14.0 whatsnew
  41. df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
  42. g = df.groupby('A')
  43. result = g.first()
  44. expected = df.iloc[[1, 2]].set_index('A')
  45. assert_frame_equal(result, expected)
  46. expected = df.iloc[[1, 2]].set_index('A')
  47. result = g.nth(0, dropna='any')
  48. assert_frame_equal(result, expected)
  49. def test_first_last_nth_dtypes(self):
  50. df = self.df_mixed_floats.copy()
  51. df['E'] = True
  52. df['F'] = 1
  53. # tests for first / last / nth
  54. grouped = df.groupby('A')
  55. first = grouped.first()
  56. expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']]
  57. expected.index = Index(['bar', 'foo'], name='A')
  58. expected = expected.sort_index()
  59. assert_frame_equal(first, expected)
  60. last = grouped.last()
  61. expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']]
  62. expected.index = Index(['bar', 'foo'], name='A')
  63. expected = expected.sort_index()
  64. assert_frame_equal(last, expected)
  65. nth = grouped.nth(1)
  66. expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']]
  67. expected.index = Index(['bar', 'foo'], name='A')
  68. expected = expected.sort_index()
  69. assert_frame_equal(nth, expected)
  70. # GH 2763, first/last shifting dtypes
  71. idx = lrange(10)
  72. idx.append(9)
  73. s = Series(data=lrange(11), index=idx, name='IntCol')
  74. assert s.dtype == 'int64'
  75. f = s.groupby(level=0).first()
  76. assert f.dtype == 'int64'
  77. def test_nth(self):
  78. df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
  79. g = df.groupby('A')
  80. assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A'))
  81. assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A'))
  82. assert_frame_equal(g.nth(2), df.loc[[]].set_index('A'))
  83. assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A'))
  84. assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A'))
  85. assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A'))
  86. assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]])
  87. assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]])
  88. assert_frame_equal(g[['B']].nth(0),
  89. df.loc[[0, 2], ['A', 'B']].set_index('A'))
  90. exp = df.set_index('A')
  91. assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]])
  92. assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]])
  93. exp['B'] = np.nan
  94. assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]])
  95. assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]])
  96. # out of bounds, regression from 0.13.1
  97. # GH 6621
  98. df = DataFrame({'color': {0: 'green',
  99. 1: 'green',
  100. 2: 'red',
  101. 3: 'red',
  102. 4: 'red'},
  103. 'food': {0: 'ham',
  104. 1: 'eggs',
  105. 2: 'eggs',
  106. 3: 'ham',
  107. 4: 'pork'},
  108. 'two': {0: 1.5456590000000001,
  109. 1: -0.070345000000000005,
  110. 2: -2.4004539999999999,
  111. 3: 0.46206000000000003,
  112. 4: 0.52350799999999997},
  113. 'one': {0: 0.56573799999999996,
  114. 1: -0.9742360000000001,
  115. 2: 1.033801,
  116. 3: -0.78543499999999999,
  117. 4: 0.70422799999999997}}).set_index(['color',
  118. 'food'])
  119. result = df.groupby(level=0, as_index=False).nth(2)
  120. expected = df.iloc[[-1]]
  121. assert_frame_equal(result, expected)
  122. result = df.groupby(level=0, as_index=False).nth(3)
  123. expected = df.loc[[]]
  124. assert_frame_equal(result, expected)
  125. # GH 7559
  126. # from the vbench
  127. df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64')
  128. s = df[1]
  129. g = df[0]
  130. expected = s.groupby(g).first()
  131. expected2 = s.groupby(g).apply(lambda x: x.iloc[0])
  132. assert_series_equal(expected2, expected, check_names=False)
  133. assert expected.name == 1
  134. assert expected2.name == 1
  135. # validate first
  136. v = s[g == 1].iloc[0]
  137. assert expected.iloc[0] == v
  138. assert expected2.iloc[0] == v
  139. # this is NOT the same as .first (as sorted is default!)
  140. # as it keeps the order in the series (and not the group order)
  141. # related GH 7287
  142. expected = s.groupby(g, sort=False).first()
  143. result = s.groupby(g, sort=False).nth(0, dropna='all')
  144. assert_series_equal(result, expected)
  145. # doc example
  146. df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
  147. g = df.groupby('A')
  148. # PR 17493, related to issue 11038
  149. # test Series.nth with True for dropna produces DeprecationWarning
  150. with assert_produces_warning(FutureWarning):
  151. result = g.B.nth(0, dropna=True)
  152. expected = g.B.first()
  153. assert_series_equal(result, expected)
  154. # test multiple nth values
  155. df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]],
  156. columns=['A', 'B'])
  157. g = df.groupby('A')
  158. assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A'))
  159. assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A'))
  160. assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A'))
  161. assert_frame_equal(
  162. g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A'))
  163. assert_frame_equal(
  164. g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
  165. assert_frame_equal(
  166. g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A'))
  167. assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A'))
  168. assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A'))
  169. business_dates = pd.date_range(start='4/1/2014', end='6/30/2014',
  170. freq='B')
  171. df = DataFrame(1, index=business_dates, columns=['a', 'b'])
  172. # get the first, fourth and last two business days for each month
  173. key = (df.index.year, df.index.month)
  174. result = df.groupby(key, as_index=False).nth([0, 3, -2, -1])
  175. expected_dates = pd.to_datetime(
  176. ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1',
  177. '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5',
  178. '2014/6/27', '2014/6/30'])
  179. expected = DataFrame(1, columns=['a', 'b'], index=expected_dates)
  180. assert_frame_equal(result, expected)
  181. def test_nth_multi_index(self):
  182. # PR 9090, related to issue 8979
  183. # test nth on MultiIndex, should match .first()
  184. grouped = self.three_group.groupby(['A', 'B'])
  185. result = grouped.nth(0)
  186. expected = grouped.first()
  187. assert_frame_equal(result, expected)
  188. def test_nth_multi_index_as_expected(self):
  189. # PR 9090, related to issue 8979
  190. # test nth on MultiIndex
  191. three_group = DataFrame(
  192. {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar',
  193. 'foo', 'foo', 'foo'],
  194. 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two',
  195. 'two', 'two', 'one'],
  196. 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny',
  197. 'dull', 'shiny', 'shiny', 'shiny']})
  198. grouped = three_group.groupby(['A', 'B'])
  199. result = grouped.nth(0)
  200. expected = DataFrame(
  201. {'C': ['dull', 'dull', 'dull', 'dull']},
  202. index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'],
  203. ['one', 'two', 'one', 'two']],
  204. names=['A', 'B']))
  205. assert_frame_equal(result, expected)
  206. def test_nth_empty():
  207. # GH 16064
  208. df = DataFrame(index=[0], columns=['a', 'b', 'c'])
  209. result = df.groupby('a').nth(10)
  210. expected = DataFrame(index=Index([], name='a'), columns=['b', 'c'])
  211. assert_frame_equal(result, expected)
  212. result = df.groupby(['a', 'b']).nth(10)
  213. expected = DataFrame(index=MultiIndex([[], []], [[], []],
  214. names=['a', 'b']),
  215. columns=['c'])
  216. assert_frame_equal(result, expected)