PageRenderTime 50ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/pandas/tools/tests/test_pivot.py

https://github.com/kljensen/pandas
Python | 326 lines | 306 code | 18 blank | 2 comment | 4 complexity | ef0aacaf3cc683e39b4e26ce68d3ac60 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. import unittest
  2. import numpy as np
  3. from pandas import DataFrame, Series
  4. from pandas.tools.merge import concat
  5. from pandas.tools.pivot import pivot_table, crosstab
  6. import pandas.util.testing as tm
  7. class TestPivotTable(unittest.TestCase):
  8. def setUp(self):
  9. self.data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
  10. 'bar', 'bar', 'bar', 'bar',
  11. 'foo', 'foo', 'foo'],
  12. 'B' : ['one', 'one', 'one', 'two',
  13. 'one', 'one', 'one', 'two',
  14. 'two', 'two', 'one'],
  15. 'C' : ['dull', 'dull', 'shiny', 'dull',
  16. 'dull', 'shiny', 'shiny', 'dull',
  17. 'shiny', 'shiny', 'shiny'],
  18. 'D' : np.random.randn(11),
  19. 'E' : np.random.randn(11),
  20. 'F' : np.random.randn(11)})
  21. def test_pivot_table(self):
  22. rows = ['A', 'B']
  23. cols= 'C'
  24. table = pivot_table(self.data, values='D', rows=rows, cols=cols)
  25. table2 = self.data.pivot_table(values='D', rows=rows, cols=cols)
  26. tm.assert_frame_equal(table, table2)
  27. # this works
  28. pivot_table(self.data, values='D', rows=rows)
  29. if len(rows) > 1:
  30. self.assertEqual(table.index.names, rows)
  31. else:
  32. self.assertEqual(table.index.name, rows[0])
  33. if len(cols) > 1:
  34. self.assertEqual(table.columns.names, cols)
  35. else:
  36. self.assertEqual(table.columns.name, cols[0])
  37. expected = self.data.groupby(rows + [cols])['D'].agg(np.mean).unstack()
  38. tm.assert_frame_equal(table, expected)
  39. def test_pass_array(self):
  40. result = self.data.pivot_table('D', rows=self.data.A, cols=self.data.C)
  41. expected = self.data.pivot_table('D', rows='A', cols='C')
  42. tm.assert_frame_equal(result, expected)
  43. def test_pass_function(self):
  44. result = self.data.pivot_table('D', rows=lambda x: x // 5,
  45. cols=self.data.C)
  46. expected = self.data.pivot_table('D', rows=self.data.index // 5,
  47. cols='C')
  48. tm.assert_frame_equal(result, expected)
  49. def test_pivot_table_multiple(self):
  50. rows = ['A', 'B']
  51. cols= 'C'
  52. table = pivot_table(self.data, rows=rows, cols=cols)
  53. expected = self.data.groupby(rows + [cols]).agg(np.mean).unstack()
  54. tm.assert_frame_equal(table, expected)
  55. def test_pivot_multi_values(self):
  56. result = pivot_table(self.data, values=['D', 'E'],
  57. rows='A', cols=['B', 'C'], fill_value=0)
  58. expected = pivot_table(self.data.drop(['F'], axis=1),
  59. rows='A', cols=['B', 'C'], fill_value=0)
  60. tm.assert_frame_equal(result, expected)
  61. def test_pivot_multi_functions(self):
  62. f = lambda func: pivot_table(self.data, values=['D', 'E'],
  63. rows=['A', 'B'], cols='C',
  64. aggfunc=func)
  65. result = f([np.mean, np.std])
  66. means = f(np.mean)
  67. stds = f(np.std)
  68. expected = concat([means, stds], keys=['mean', 'std'], axis=1)
  69. tm.assert_frame_equal(result, expected)
  70. # margins not supported??
  71. f = lambda func: pivot_table(self.data, values=['D', 'E'],
  72. rows=['A', 'B'], cols='C',
  73. aggfunc=func, margins=True)
  74. result = f([np.mean, np.std])
  75. means = f(np.mean)
  76. stds = f(np.std)
  77. expected = concat([means, stds], keys=['mean', 'std'], axis=1)
  78. tm.assert_frame_equal(result, expected)
  79. def test_margins(self):
  80. def _check_output(res, col, rows=['A', 'B'], cols=['C']):
  81. cmarg = res['All'][:-1]
  82. exp = self.data.groupby(rows)[col].mean()
  83. tm.assert_series_equal(cmarg, exp)
  84. rmarg = res.xs(('All', ''))[:-1]
  85. exp = self.data.groupby(cols)[col].mean()
  86. tm.assert_series_equal(rmarg, exp)
  87. gmarg = res['All']['All', '']
  88. exp = self.data[col].mean()
  89. self.assertEqual(gmarg, exp)
  90. # column specified
  91. table = self.data.pivot_table('D', rows=['A', 'B'], cols='C',
  92. margins=True, aggfunc=np.mean)
  93. _check_output(table, 'D')
  94. # no column specified
  95. table = self.data.pivot_table(rows=['A', 'B'], cols='C',
  96. margins=True, aggfunc=np.mean)
  97. for valcol in table.columns.levels[0]:
  98. _check_output(table[valcol], valcol)
  99. # no col
  100. # to help with a buglet
  101. self.data.columns = [k * 2 for k in self.data.columns]
  102. table = self.data.pivot_table(rows=['AA', 'BB'], margins=True,
  103. aggfunc=np.mean)
  104. for valcol in table.columns:
  105. gmarg = table[valcol]['All', '']
  106. self.assertEqual(gmarg, self.data[valcol].mean())
  107. # this is OK
  108. table = self.data.pivot_table(rows=['AA', 'BB'], margins=True,
  109. aggfunc='mean')
  110. # no rows
  111. rtable = self.data.pivot_table(cols=['AA', 'BB'], margins=True,
  112. aggfunc=np.mean)
  113. self.assert_(isinstance(rtable, Series))
  114. for item in ['DD', 'EE', 'FF']:
  115. gmarg = table[item]['All', '']
  116. self.assertEqual(gmarg, self.data[item].mean())
  117. def test_pivot_integer_columns(self):
  118. # caused by upstream bug in unstack
  119. from pandas.util.compat import product
  120. import datetime
  121. import pandas
  122. d = datetime.date.min
  123. data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'],
  124. [d + datetime.timedelta(i) for i in xrange(20)], [1.0]))
  125. df = pandas.DataFrame(data)
  126. table = df.pivot_table(values=4, rows=[0,1,3],cols=[2])
  127. df2 = df.rename(columns=str)
  128. table2 = df2.pivot_table(values='4', rows=['0','1','3'], cols=['2'])
  129. tm.assert_frame_equal(table, table2)
  130. def test_pivot_no_level_overlap(self):
  131. # GH #1181
  132. data = DataFrame({'a': ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'] * 2,
  133. 'b': [0, 0, 0, 0, 1, 1, 1, 1] * 2,
  134. 'c': (['foo'] * 4 + ['bar'] * 4) * 2,
  135. 'value': np.random.randn(16)})
  136. table = data.pivot_table('value', rows='a', cols=['b', 'c'])
  137. grouped = data.groupby(['a', 'b', 'c'])['value'].mean()
  138. expected = grouped.unstack('b').unstack('c').dropna(axis=1, how='all')
  139. tm.assert_frame_equal(table, expected)
  140. def test_pivot_columns_lexsorted(self):
  141. import datetime
  142. import numpy as np
  143. import pandas
  144. n = 10000
  145. dtype = np.dtype([
  146. ("Index", object),
  147. ("Symbol", object),
  148. ("Year", int),
  149. ("Month", int),
  150. ("Day", int),
  151. ("Quantity", int),
  152. ("Price", float),
  153. ])
  154. products = np.array([
  155. ('SP500', 'ADBE'),
  156. ('SP500', 'NVDA'),
  157. ('SP500', 'ORCL'),
  158. ('NDQ100', 'AAPL'),
  159. ('NDQ100', 'MSFT'),
  160. ('NDQ100', 'GOOG'),
  161. ('FTSE', 'DGE.L'),
  162. ('FTSE', 'TSCO.L'),
  163. ('FTSE', 'GSK.L'),
  164. ], dtype=[('Index', object), ('Symbol', object)])
  165. items = np.empty(n, dtype=dtype)
  166. iproduct = np.random.randint(0, len(products), n)
  167. items['Index'] = products['Index'][iproduct]
  168. items['Symbol'] = products['Symbol'][iproduct]
  169. dr = pandas.date_range(datetime.date(2000, 1, 1),
  170. datetime.date(2010, 12, 31))
  171. dates = dr[np.random.randint(0, len(dr), n)]
  172. items['Year'] = dates.year
  173. items['Month'] = dates.month
  174. items['Day'] = dates.day
  175. items['Price'] = np.random.lognormal(4.0, 2.0, n)
  176. df = DataFrame(items)
  177. pivoted = df.pivot_table('Price', rows=['Month', 'Day'],
  178. cols=['Index', 'Symbol', 'Year'],
  179. aggfunc='mean')
  180. self.assert_(pivoted.columns.is_monotonic)
  181. class TestCrosstab(unittest.TestCase):
  182. def setUp(self):
  183. df = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
  184. 'bar', 'bar', 'bar', 'bar',
  185. 'foo', 'foo', 'foo'],
  186. 'B' : ['one', 'one', 'one', 'two',
  187. 'one', 'one', 'one', 'two',
  188. 'two', 'two', 'one'],
  189. 'C' : ['dull', 'dull', 'shiny', 'dull',
  190. 'dull', 'shiny', 'shiny', 'dull',
  191. 'shiny', 'shiny', 'shiny'],
  192. 'D' : np.random.randn(11),
  193. 'E' : np.random.randn(11),
  194. 'F' : np.random.randn(11)})
  195. self.df = df.append(df, ignore_index=True)
  196. def test_crosstab_single(self):
  197. df = self.df
  198. result = crosstab(df['A'], df['C'])
  199. expected = df.groupby(['A', 'C']).size().unstack()
  200. tm.assert_frame_equal(result, expected.fillna(0).astype(np.int64))
  201. def test_crosstab_multiple(self):
  202. df = self.df
  203. result = crosstab(df['A'], [df['B'], df['C']])
  204. expected = df.groupby(['A', 'B', 'C']).size()
  205. expected = expected.unstack('B').unstack('C').fillna(0).astype(np.int64)
  206. tm.assert_frame_equal(result, expected)
  207. result = crosstab([df['B'], df['C']], df['A'])
  208. expected = df.groupby(['B', 'C', 'A']).size()
  209. expected = expected.unstack('A').fillna(0).astype(np.int64)
  210. tm.assert_frame_equal(result, expected)
  211. def test_crosstab_ndarray(self):
  212. a = np.random.randint(0, 5, size=100)
  213. b = np.random.randint(0, 3, size=100)
  214. c = np.random.randint(0, 10, size=100)
  215. df = DataFrame({'a': a, 'b': b, 'c': c})
  216. result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'))
  217. expected = crosstab(df['a'], [df['b'], df['c']])
  218. tm.assert_frame_equal(result, expected)
  219. result = crosstab([b, c], a, colnames=['a'], rownames=('b', 'c'))
  220. expected = crosstab([df['b'], df['c']], df['a'])
  221. tm.assert_frame_equal(result, expected)
  222. # assign arbitrary names
  223. result = crosstab(self.df['A'].values, self.df['C'].values)
  224. self.assertEqual(result.index.name, 'row_0')
  225. self.assertEqual(result.columns.name, 'col_0')
  226. def test_crosstab_margins(self):
  227. a = np.random.randint(0, 7, size=100)
  228. b = np.random.randint(0, 3, size=100)
  229. c = np.random.randint(0, 5, size=100)
  230. df = DataFrame({'a': a, 'b': b, 'c': c})
  231. result = crosstab(a, [b, c], rownames=['a'], colnames=('b', 'c'),
  232. margins=True)
  233. self.assertEqual(result.index.names, ['a'])
  234. self.assertEqual(result.columns.names, ['b', 'c'])
  235. all_cols = result['All', '']
  236. exp_cols = df.groupby(['a']).size().astype('i8')
  237. exp_cols = exp_cols.append(Series([len(df)], index=['All']))
  238. tm.assert_series_equal(all_cols, exp_cols)
  239. all_rows = result.ix['All']
  240. exp_rows = df.groupby(['b', 'c']).size().astype('i8')
  241. exp_rows = exp_rows.append(Series([len(df)], index=[('All', '')]))
  242. exp_rows = exp_rows.reindex(all_rows.index)
  243. exp_rows = exp_rows.fillna(0).astype(np.int64)
  244. tm.assert_series_equal(all_rows, exp_rows)
  245. def test_crosstab_pass_values(self):
  246. a = np.random.randint(0, 7, size=100)
  247. b = np.random.randint(0, 3, size=100)
  248. c = np.random.randint(0, 5, size=100)
  249. values = np.random.randn(100)
  250. table = crosstab([a, b], c, values, aggfunc=np.sum,
  251. rownames=['foo', 'bar'], colnames=['baz'])
  252. df = DataFrame({'foo': a, 'bar': b, 'baz': c, 'values' : values})
  253. expected = df.pivot_table('values', rows=['foo', 'bar'], cols='baz',
  254. aggfunc=np.sum)
  255. tm.assert_frame_equal(table, expected)
  256. if __name__ == '__main__':
  257. import nose
  258. nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
  259. exit=False)