PageRenderTime 43ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tests/test_take.py

http://github.com/wesm/pandas
Python | 468 lines | 455 code | 9 blank | 4 comment | 1 complexity | 444eb52978bb9990f8b07e6dc38d827f MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. # -*- coding: utf-8 -*-
  2. from datetime import datetime
  3. import re
  4. import numpy as np
  5. import pytest
  6. from pandas._libs.tslib import iNaT
  7. from pandas.compat import long
  8. import pandas.core.algorithms as algos
  9. import pandas.util.testing as tm
  10. @pytest.fixture(params=[True, False])
  11. def writeable(request):
  12. return request.param
  13. # Check that take_nd works both with writeable arrays
  14. # (in which case fast typed memory-views implementation)
  15. # and read-only arrays alike.
  16. @pytest.fixture(params=[
  17. (np.float64, True),
  18. (np.float32, True),
  19. (np.uint64, False),
  20. (np.uint32, False),
  21. (np.uint16, False),
  22. (np.uint8, False),
  23. (np.int64, False),
  24. (np.int32, False),
  25. (np.int16, False),
  26. (np.int8, False),
  27. (np.object_, True),
  28. (np.bool, False),
  29. ])
  30. def dtype_can_hold_na(request):
  31. return request.param
  32. @pytest.fixture(params=[
  33. (np.int8, np.int16(127), np.int8),
  34. (np.int8, np.int16(128), np.int16),
  35. (np.int32, 1, np.int32),
  36. (np.int32, 2.0, np.float64),
  37. (np.int32, 3.0 + 4.0j, np.complex128),
  38. (np.int32, True, np.object_),
  39. (np.int32, "", np.object_),
  40. (np.float64, 1, np.float64),
  41. (np.float64, 2.0, np.float64),
  42. (np.float64, 3.0 + 4.0j, np.complex128),
  43. (np.float64, True, np.object_),
  44. (np.float64, "", np.object_),
  45. (np.complex128, 1, np.complex128),
  46. (np.complex128, 2.0, np.complex128),
  47. (np.complex128, 3.0 + 4.0j, np.complex128),
  48. (np.complex128, True, np.object_),
  49. (np.complex128, "", np.object_),
  50. (np.bool_, 1, np.object_),
  51. (np.bool_, 2.0, np.object_),
  52. (np.bool_, 3.0 + 4.0j, np.object_),
  53. (np.bool_, True, np.bool_),
  54. (np.bool_, '', np.object_),
  55. ])
  56. def dtype_fill_out_dtype(request):
  57. return request.param
  58. class TestTake(object):
  59. # Standard incompatible fill error.
  60. fill_error = re.compile("Incompatible type for fill_value")
  61. def test_1d_with_out(self, dtype_can_hold_na, writeable):
  62. dtype, can_hold_na = dtype_can_hold_na
  63. data = np.random.randint(0, 2, 4).astype(dtype)
  64. data.flags.writeable = writeable
  65. indexer = [2, 1, 0, 1]
  66. out = np.empty(4, dtype=dtype)
  67. algos.take_1d(data, indexer, out=out)
  68. expected = data.take(indexer)
  69. tm.assert_almost_equal(out, expected)
  70. indexer = [2, 1, 0, -1]
  71. out = np.empty(4, dtype=dtype)
  72. if can_hold_na:
  73. algos.take_1d(data, indexer, out=out)
  74. expected = data.take(indexer)
  75. expected[3] = np.nan
  76. tm.assert_almost_equal(out, expected)
  77. else:
  78. with pytest.raises(TypeError, match=self.fill_error):
  79. algos.take_1d(data, indexer, out=out)
  80. # No Exception otherwise.
  81. data.take(indexer, out=out)
  82. def test_1d_fill_nonna(self, dtype_fill_out_dtype):
  83. dtype, fill_value, out_dtype = dtype_fill_out_dtype
  84. data = np.random.randint(0, 2, 4).astype(dtype)
  85. indexer = [2, 1, 0, -1]
  86. result = algos.take_1d(data, indexer, fill_value=fill_value)
  87. assert ((result[[0, 1, 2]] == data[[2, 1, 0]]).all())
  88. assert (result[3] == fill_value)
  89. assert (result.dtype == out_dtype)
  90. indexer = [2, 1, 0, 1]
  91. result = algos.take_1d(data, indexer, fill_value=fill_value)
  92. assert ((result[[0, 1, 2, 3]] == data[indexer]).all())
  93. assert (result.dtype == dtype)
  94. def test_2d_with_out(self, dtype_can_hold_na, writeable):
  95. dtype, can_hold_na = dtype_can_hold_na
  96. data = np.random.randint(0, 2, (5, 3)).astype(dtype)
  97. data.flags.writeable = writeable
  98. indexer = [2, 1, 0, 1]
  99. out0 = np.empty((4, 3), dtype=dtype)
  100. out1 = np.empty((5, 4), dtype=dtype)
  101. algos.take_nd(data, indexer, out=out0, axis=0)
  102. algos.take_nd(data, indexer, out=out1, axis=1)
  103. expected0 = data.take(indexer, axis=0)
  104. expected1 = data.take(indexer, axis=1)
  105. tm.assert_almost_equal(out0, expected0)
  106. tm.assert_almost_equal(out1, expected1)
  107. indexer = [2, 1, 0, -1]
  108. out0 = np.empty((4, 3), dtype=dtype)
  109. out1 = np.empty((5, 4), dtype=dtype)
  110. if can_hold_na:
  111. algos.take_nd(data, indexer, out=out0, axis=0)
  112. algos.take_nd(data, indexer, out=out1, axis=1)
  113. expected0 = data.take(indexer, axis=0)
  114. expected1 = data.take(indexer, axis=1)
  115. expected0[3, :] = np.nan
  116. expected1[:, 3] = np.nan
  117. tm.assert_almost_equal(out0, expected0)
  118. tm.assert_almost_equal(out1, expected1)
  119. else:
  120. for i, out in enumerate([out0, out1]):
  121. with pytest.raises(TypeError, match=self.fill_error):
  122. algos.take_nd(data, indexer, out=out, axis=i)
  123. # No Exception otherwise.
  124. data.take(indexer, out=out, axis=i)
  125. def test_2d_fill_nonna(self, dtype_fill_out_dtype):
  126. dtype, fill_value, out_dtype = dtype_fill_out_dtype
  127. data = np.random.randint(0, 2, (5, 3)).astype(dtype)
  128. indexer = [2, 1, 0, -1]
  129. result = algos.take_nd(data, indexer, axis=0,
  130. fill_value=fill_value)
  131. assert ((result[[0, 1, 2], :] == data[[2, 1, 0], :]).all())
  132. assert ((result[3, :] == fill_value).all())
  133. assert (result.dtype == out_dtype)
  134. result = algos.take_nd(data, indexer, axis=1,
  135. fill_value=fill_value)
  136. assert ((result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all())
  137. assert ((result[:, 3] == fill_value).all())
  138. assert (result.dtype == out_dtype)
  139. indexer = [2, 1, 0, 1]
  140. result = algos.take_nd(data, indexer, axis=0,
  141. fill_value=fill_value)
  142. assert ((result[[0, 1, 2, 3], :] == data[indexer, :]).all())
  143. assert (result.dtype == dtype)
  144. result = algos.take_nd(data, indexer, axis=1,
  145. fill_value=fill_value)
  146. assert ((result[:, [0, 1, 2, 3]] == data[:, indexer]).all())
  147. assert (result.dtype == dtype)
  148. def test_3d_with_out(self, dtype_can_hold_na):
  149. dtype, can_hold_na = dtype_can_hold_na
  150. data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype)
  151. indexer = [2, 1, 0, 1]
  152. out0 = np.empty((4, 4, 3), dtype=dtype)
  153. out1 = np.empty((5, 4, 3), dtype=dtype)
  154. out2 = np.empty((5, 4, 4), dtype=dtype)
  155. algos.take_nd(data, indexer, out=out0, axis=0)
  156. algos.take_nd(data, indexer, out=out1, axis=1)
  157. algos.take_nd(data, indexer, out=out2, axis=2)
  158. expected0 = data.take(indexer, axis=0)
  159. expected1 = data.take(indexer, axis=1)
  160. expected2 = data.take(indexer, axis=2)
  161. tm.assert_almost_equal(out0, expected0)
  162. tm.assert_almost_equal(out1, expected1)
  163. tm.assert_almost_equal(out2, expected2)
  164. indexer = [2, 1, 0, -1]
  165. out0 = np.empty((4, 4, 3), dtype=dtype)
  166. out1 = np.empty((5, 4, 3), dtype=dtype)
  167. out2 = np.empty((5, 4, 4), dtype=dtype)
  168. if can_hold_na:
  169. algos.take_nd(data, indexer, out=out0, axis=0)
  170. algos.take_nd(data, indexer, out=out1, axis=1)
  171. algos.take_nd(data, indexer, out=out2, axis=2)
  172. expected0 = data.take(indexer, axis=0)
  173. expected1 = data.take(indexer, axis=1)
  174. expected2 = data.take(indexer, axis=2)
  175. expected0[3, :, :] = np.nan
  176. expected1[:, 3, :] = np.nan
  177. expected2[:, :, 3] = np.nan
  178. tm.assert_almost_equal(out0, expected0)
  179. tm.assert_almost_equal(out1, expected1)
  180. tm.assert_almost_equal(out2, expected2)
  181. else:
  182. for i, out in enumerate([out0, out1, out2]):
  183. with pytest.raises(TypeError, match=self.fill_error):
  184. algos.take_nd(data, indexer, out=out, axis=i)
  185. # No Exception otherwise.
  186. data.take(indexer, out=out, axis=i)
  187. def test_3d_fill_nonna(self, dtype_fill_out_dtype):
  188. dtype, fill_value, out_dtype = dtype_fill_out_dtype
  189. data = np.random.randint(0, 2, (5, 4, 3)).astype(dtype)
  190. indexer = [2, 1, 0, -1]
  191. result = algos.take_nd(data, indexer, axis=0,
  192. fill_value=fill_value)
  193. assert ((result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all())
  194. assert ((result[3, :, :] == fill_value).all())
  195. assert (result.dtype == out_dtype)
  196. result = algos.take_nd(data, indexer, axis=1,
  197. fill_value=fill_value)
  198. assert ((result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all())
  199. assert ((result[:, 3, :] == fill_value).all())
  200. assert (result.dtype == out_dtype)
  201. result = algos.take_nd(data, indexer, axis=2,
  202. fill_value=fill_value)
  203. assert ((result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all())
  204. assert ((result[:, :, 3] == fill_value).all())
  205. assert (result.dtype == out_dtype)
  206. indexer = [2, 1, 0, 1]
  207. result = algos.take_nd(data, indexer, axis=0,
  208. fill_value=fill_value)
  209. assert ((result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all())
  210. assert (result.dtype == dtype)
  211. result = algos.take_nd(data, indexer, axis=1,
  212. fill_value=fill_value)
  213. assert ((result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all())
  214. assert (result.dtype == dtype)
  215. result = algos.take_nd(data, indexer, axis=2,
  216. fill_value=fill_value)
  217. assert ((result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all())
  218. assert (result.dtype == dtype)
  219. def test_1d_other_dtypes(self):
  220. arr = np.random.randn(10).astype(np.float32)
  221. indexer = [1, 2, 3, -1]
  222. result = algos.take_1d(arr, indexer)
  223. expected = arr.take(indexer)
  224. expected[-1] = np.nan
  225. tm.assert_almost_equal(result, expected)
  226. def test_2d_other_dtypes(self):
  227. arr = np.random.randn(10, 5).astype(np.float32)
  228. indexer = [1, 2, 3, -1]
  229. # axis=0
  230. result = algos.take_nd(arr, indexer, axis=0)
  231. expected = arr.take(indexer, axis=0)
  232. expected[-1] = np.nan
  233. tm.assert_almost_equal(result, expected)
  234. # axis=1
  235. result = algos.take_nd(arr, indexer, axis=1)
  236. expected = arr.take(indexer, axis=1)
  237. expected[:, -1] = np.nan
  238. tm.assert_almost_equal(result, expected)
  239. def test_1d_bool(self):
  240. arr = np.array([0, 1, 0], dtype=bool)
  241. result = algos.take_1d(arr, [0, 2, 2, 1])
  242. expected = arr.take([0, 2, 2, 1])
  243. tm.assert_numpy_array_equal(result, expected)
  244. result = algos.take_1d(arr, [0, 2, -1])
  245. assert result.dtype == np.object_
  246. def test_2d_bool(self):
  247. arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool)
  248. result = algos.take_nd(arr, [0, 2, 2, 1])
  249. expected = arr.take([0, 2, 2, 1], axis=0)
  250. tm.assert_numpy_array_equal(result, expected)
  251. result = algos.take_nd(arr, [0, 2, 2, 1], axis=1)
  252. expected = arr.take([0, 2, 2, 1], axis=1)
  253. tm.assert_numpy_array_equal(result, expected)
  254. result = algos.take_nd(arr, [0, 2, -1])
  255. assert result.dtype == np.object_
  256. def test_2d_float32(self):
  257. arr = np.random.randn(4, 3).astype(np.float32)
  258. indexer = [0, 2, -1, 1, -1]
  259. # axis=0
  260. result = algos.take_nd(arr, indexer, axis=0)
  261. result2 = np.empty_like(result)
  262. algos.take_nd(arr, indexer, axis=0, out=result2)
  263. tm.assert_almost_equal(result, result2)
  264. expected = arr.take(indexer, axis=0)
  265. expected[[2, 4], :] = np.nan
  266. tm.assert_almost_equal(result, expected)
  267. # this now accepts a float32! # test with float64 out buffer
  268. out = np.empty((len(indexer), arr.shape[1]), dtype='float32')
  269. algos.take_nd(arr, indexer, out=out) # it works!
  270. # axis=1
  271. result = algos.take_nd(arr, indexer, axis=1)
  272. result2 = np.empty_like(result)
  273. algos.take_nd(arr, indexer, axis=1, out=result2)
  274. tm.assert_almost_equal(result, result2)
  275. expected = arr.take(indexer, axis=1)
  276. expected[:, [2, 4]] = np.nan
  277. tm.assert_almost_equal(result, expected)
  278. def test_2d_datetime64(self):
  279. # 2005/01/01 - 2006/01/01
  280. arr = np.random.randint(
  281. long(11045376), long(11360736), (5, 3)) * 100000000000
  282. arr = arr.view(dtype='datetime64[ns]')
  283. indexer = [0, 2, -1, 1, -1]
  284. # axis=0
  285. result = algos.take_nd(arr, indexer, axis=0)
  286. result2 = np.empty_like(result)
  287. algos.take_nd(arr, indexer, axis=0, out=result2)
  288. tm.assert_almost_equal(result, result2)
  289. expected = arr.take(indexer, axis=0)
  290. expected.view(np.int64)[[2, 4], :] = iNaT
  291. tm.assert_almost_equal(result, expected)
  292. result = algos.take_nd(arr, indexer, axis=0,
  293. fill_value=datetime(2007, 1, 1))
  294. result2 = np.empty_like(result)
  295. algos.take_nd(arr, indexer, out=result2, axis=0,
  296. fill_value=datetime(2007, 1, 1))
  297. tm.assert_almost_equal(result, result2)
  298. expected = arr.take(indexer, axis=0)
  299. expected[[2, 4], :] = datetime(2007, 1, 1)
  300. tm.assert_almost_equal(result, expected)
  301. # axis=1
  302. result = algos.take_nd(arr, indexer, axis=1)
  303. result2 = np.empty_like(result)
  304. algos.take_nd(arr, indexer, axis=1, out=result2)
  305. tm.assert_almost_equal(result, result2)
  306. expected = arr.take(indexer, axis=1)
  307. expected.view(np.int64)[:, [2, 4]] = iNaT
  308. tm.assert_almost_equal(result, expected)
  309. result = algos.take_nd(arr, indexer, axis=1,
  310. fill_value=datetime(2007, 1, 1))
  311. result2 = np.empty_like(result)
  312. algos.take_nd(arr, indexer, out=result2, axis=1,
  313. fill_value=datetime(2007, 1, 1))
  314. tm.assert_almost_equal(result, result2)
  315. expected = arr.take(indexer, axis=1)
  316. expected[:, [2, 4]] = datetime(2007, 1, 1)
  317. tm.assert_almost_equal(result, expected)
  318. def test_take_axis_0(self):
  319. arr = np.arange(12).reshape(4, 3)
  320. result = algos.take(arr, [0, -1])
  321. expected = np.array([[0, 1, 2], [9, 10, 11]])
  322. tm.assert_numpy_array_equal(result, expected)
  323. # allow_fill=True
  324. result = algos.take(arr, [0, -1], allow_fill=True, fill_value=0)
  325. expected = np.array([[0, 1, 2], [0, 0, 0]])
  326. tm.assert_numpy_array_equal(result, expected)
  327. def test_take_axis_1(self):
  328. arr = np.arange(12).reshape(4, 3)
  329. result = algos.take(arr, [0, -1], axis=1)
  330. expected = np.array([[0, 2], [3, 5], [6, 8], [9, 11]])
  331. tm.assert_numpy_array_equal(result, expected)
  332. # allow_fill=True
  333. result = algos.take(arr, [0, -1], axis=1, allow_fill=True,
  334. fill_value=0)
  335. expected = np.array([[0, 0], [3, 0], [6, 0], [9, 0]])
  336. tm.assert_numpy_array_equal(result, expected)
  337. class TestExtensionTake(object):
  338. # The take method found in pd.api.extensions
  339. def test_bounds_check_large(self):
  340. arr = np.array([1, 2])
  341. with pytest.raises(IndexError):
  342. algos.take(arr, [2, 3], allow_fill=True)
  343. with pytest.raises(IndexError):
  344. algos.take(arr, [2, 3], allow_fill=False)
  345. def test_bounds_check_small(self):
  346. arr = np.array([1, 2, 3], dtype=np.int64)
  347. indexer = [0, -1, -2]
  348. with pytest.raises(ValueError):
  349. algos.take(arr, indexer, allow_fill=True)
  350. result = algos.take(arr, indexer)
  351. expected = np.array([1, 3, 2], dtype=np.int64)
  352. tm.assert_numpy_array_equal(result, expected)
  353. @pytest.mark.parametrize('allow_fill', [True, False])
  354. def test_take_empty(self, allow_fill):
  355. arr = np.array([], dtype=np.int64)
  356. # empty take is ok
  357. result = algos.take(arr, [], allow_fill=allow_fill)
  358. tm.assert_numpy_array_equal(arr, result)
  359. with pytest.raises(IndexError):
  360. algos.take(arr, [0], allow_fill=allow_fill)
  361. def test_take_na_empty(self):
  362. result = algos.take(np.array([]), [-1, -1], allow_fill=True,
  363. fill_value=0.0)
  364. expected = np.array([0., 0.])
  365. tm.assert_numpy_array_equal(result, expected)
  366. def test_take_coerces_list(self):
  367. arr = [1, 2, 3]
  368. result = algos.take(arr, [0, 0])
  369. expected = np.array([1, 1])
  370. tm.assert_numpy_array_equal(result, expected)