PageRenderTime 65ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/tests/frame/test_replace.py

http://github.com/wesm/pandas
Python | 1115 lines | 883 code | 136 blank | 96 comment | 4 complexity | 8f02194ccba3d17b274dc628dda9a184 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. from datetime import datetime
  4. import re
  5. import numpy as np
  6. import pytest
  7. from pandas.compat import StringIO, lrange, range, zip
  8. import pandas as pd
  9. from pandas import DataFrame, Index, Series, Timestamp, compat, date_range
  10. from pandas.tests.frame.common import TestData
  11. from pandas.util.testing import assert_frame_equal, assert_series_equal
  12. class TestDataFrameReplace(TestData):
  13. def test_replace_inplace(self):
  14. self.tsframe['A'][:5] = np.nan
  15. self.tsframe['A'][-5:] = np.nan
  16. tsframe = self.tsframe.copy()
  17. tsframe.replace(np.nan, 0, inplace=True)
  18. assert_frame_equal(tsframe, self.tsframe.fillna(0))
  19. # mixed type
  20. mf = self.mixed_frame
  21. mf.iloc[5:20, mf.columns.get_loc('foo')] = np.nan
  22. mf.iloc[-10:, mf.columns.get_loc('A')] = np.nan
  23. result = self.mixed_frame.replace(np.nan, 0)
  24. expected = self.mixed_frame.fillna(value=0)
  25. assert_frame_equal(result, expected)
  26. tsframe = self.tsframe.copy()
  27. tsframe.replace([np.nan], [0], inplace=True)
  28. assert_frame_equal(tsframe, self.tsframe.fillna(0))
  29. def test_regex_replace_scalar(self):
  30. obj = {'a': list('ab..'), 'b': list('efgh')}
  31. dfobj = DataFrame(obj)
  32. mix = {'a': lrange(4), 'b': list('ab..')}
  33. dfmix = DataFrame(mix)
  34. # simplest cases
  35. # regex -> value
  36. # obj frame
  37. res = dfobj.replace(r'\s*\.\s*', np.nan, regex=True)
  38. assert_frame_equal(dfobj, res.fillna('.'))
  39. # mixed
  40. res = dfmix.replace(r'\s*\.\s*', np.nan, regex=True)
  41. assert_frame_equal(dfmix, res.fillna('.'))
  42. # regex -> regex
  43. # obj frame
  44. res = dfobj.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True)
  45. objc = obj.copy()
  46. objc['a'] = ['a', 'b', '...', '...']
  47. expec = DataFrame(objc)
  48. assert_frame_equal(res, expec)
  49. # with mixed
  50. res = dfmix.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True)
  51. mixc = mix.copy()
  52. mixc['b'] = ['a', 'b', '...', '...']
  53. expec = DataFrame(mixc)
  54. assert_frame_equal(res, expec)
  55. # everything with compiled regexs as well
  56. res = dfobj.replace(re.compile(r'\s*\.\s*'), np.nan, regex=True)
  57. assert_frame_equal(dfobj, res.fillna('.'))
  58. # mixed
  59. res = dfmix.replace(re.compile(r'\s*\.\s*'), np.nan, regex=True)
  60. assert_frame_equal(dfmix, res.fillna('.'))
  61. # regex -> regex
  62. # obj frame
  63. res = dfobj.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1')
  64. objc = obj.copy()
  65. objc['a'] = ['a', 'b', '...', '...']
  66. expec = DataFrame(objc)
  67. assert_frame_equal(res, expec)
  68. # with mixed
  69. res = dfmix.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1')
  70. mixc = mix.copy()
  71. mixc['b'] = ['a', 'b', '...', '...']
  72. expec = DataFrame(mixc)
  73. assert_frame_equal(res, expec)
  74. res = dfmix.replace(regex=re.compile(r'\s*(\.)\s*'), value=r'\1\1\1')
  75. mixc = mix.copy()
  76. mixc['b'] = ['a', 'b', '...', '...']
  77. expec = DataFrame(mixc)
  78. assert_frame_equal(res, expec)
  79. res = dfmix.replace(regex=r'\s*(\.)\s*', value=r'\1\1\1')
  80. mixc = mix.copy()
  81. mixc['b'] = ['a', 'b', '...', '...']
  82. expec = DataFrame(mixc)
  83. assert_frame_equal(res, expec)
  84. def test_regex_replace_scalar_inplace(self):
  85. obj = {'a': list('ab..'), 'b': list('efgh')}
  86. dfobj = DataFrame(obj)
  87. mix = {'a': lrange(4), 'b': list('ab..')}
  88. dfmix = DataFrame(mix)
  89. # simplest cases
  90. # regex -> value
  91. # obj frame
  92. res = dfobj.copy()
  93. res.replace(r'\s*\.\s*', np.nan, regex=True, inplace=True)
  94. assert_frame_equal(dfobj, res.fillna('.'))
  95. # mixed
  96. res = dfmix.copy()
  97. res.replace(r'\s*\.\s*', np.nan, regex=True, inplace=True)
  98. assert_frame_equal(dfmix, res.fillna('.'))
  99. # regex -> regex
  100. # obj frame
  101. res = dfobj.copy()
  102. res.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True, inplace=True)
  103. objc = obj.copy()
  104. objc['a'] = ['a', 'b', '...', '...']
  105. expec = DataFrame(objc)
  106. assert_frame_equal(res, expec)
  107. # with mixed
  108. res = dfmix.copy()
  109. res.replace(r'\s*(\.)\s*', r'\1\1\1', regex=True, inplace=True)
  110. mixc = mix.copy()
  111. mixc['b'] = ['a', 'b', '...', '...']
  112. expec = DataFrame(mixc)
  113. assert_frame_equal(res, expec)
  114. # everything with compiled regexs as well
  115. res = dfobj.copy()
  116. res.replace(re.compile(r'\s*\.\s*'), np.nan, regex=True, inplace=True)
  117. assert_frame_equal(dfobj, res.fillna('.'))
  118. # mixed
  119. res = dfmix.copy()
  120. res.replace(re.compile(r'\s*\.\s*'), np.nan, regex=True, inplace=True)
  121. assert_frame_equal(dfmix, res.fillna('.'))
  122. # regex -> regex
  123. # obj frame
  124. res = dfobj.copy()
  125. res.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1', regex=True,
  126. inplace=True)
  127. objc = obj.copy()
  128. objc['a'] = ['a', 'b', '...', '...']
  129. expec = DataFrame(objc)
  130. assert_frame_equal(res, expec)
  131. # with mixed
  132. res = dfmix.copy()
  133. res.replace(re.compile(r'\s*(\.)\s*'), r'\1\1\1', regex=True,
  134. inplace=True)
  135. mixc = mix.copy()
  136. mixc['b'] = ['a', 'b', '...', '...']
  137. expec = DataFrame(mixc)
  138. assert_frame_equal(res, expec)
  139. res = dfobj.copy()
  140. res.replace(regex=r'\s*\.\s*', value=np.nan, inplace=True)
  141. assert_frame_equal(dfobj, res.fillna('.'))
  142. # mixed
  143. res = dfmix.copy()
  144. res.replace(regex=r'\s*\.\s*', value=np.nan, inplace=True)
  145. assert_frame_equal(dfmix, res.fillna('.'))
  146. # regex -> regex
  147. # obj frame
  148. res = dfobj.copy()
  149. res.replace(regex=r'\s*(\.)\s*', value=r'\1\1\1', inplace=True)
  150. objc = obj.copy()
  151. objc['a'] = ['a', 'b', '...', '...']
  152. expec = DataFrame(objc)
  153. assert_frame_equal(res, expec)
  154. # with mixed
  155. res = dfmix.copy()
  156. res.replace(regex=r'\s*(\.)\s*', value=r'\1\1\1', inplace=True)
  157. mixc = mix.copy()
  158. mixc['b'] = ['a', 'b', '...', '...']
  159. expec = DataFrame(mixc)
  160. assert_frame_equal(res, expec)
  161. # everything with compiled regexs as well
  162. res = dfobj.copy()
  163. res.replace(regex=re.compile(r'\s*\.\s*'), value=np.nan, inplace=True)
  164. assert_frame_equal(dfobj, res.fillna('.'))
  165. # mixed
  166. res = dfmix.copy()
  167. res.replace(regex=re.compile(r'\s*\.\s*'), value=np.nan, inplace=True)
  168. assert_frame_equal(dfmix, res.fillna('.'))
  169. # regex -> regex
  170. # obj frame
  171. res = dfobj.copy()
  172. res.replace(regex=re.compile(r'\s*(\.)\s*'), value=r'\1\1\1',
  173. inplace=True)
  174. objc = obj.copy()
  175. objc['a'] = ['a', 'b', '...', '...']
  176. expec = DataFrame(objc)
  177. assert_frame_equal(res, expec)
  178. # with mixed
  179. res = dfmix.copy()
  180. res.replace(regex=re.compile(r'\s*(\.)\s*'), value=r'\1\1\1',
  181. inplace=True)
  182. mixc = mix.copy()
  183. mixc['b'] = ['a', 'b', '...', '...']
  184. expec = DataFrame(mixc)
  185. assert_frame_equal(res, expec)
  186. def test_regex_replace_list_obj(self):
  187. obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
  188. dfobj = DataFrame(obj)
  189. # lists of regexes and values
  190. # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
  191. to_replace_res = [r'\s*\.\s*', r'e|f|g']
  192. values = [np.nan, 'crap']
  193. res = dfobj.replace(to_replace_res, values, regex=True)
  194. expec = DataFrame({'a': ['a', 'b', np.nan, np.nan], 'b': ['crap'] * 3 +
  195. ['h'], 'c': ['h', 'crap', 'l', 'o']})
  196. assert_frame_equal(res, expec)
  197. # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
  198. to_replace_res = [r'\s*(\.)\s*', r'(e|f|g)']
  199. values = [r'\1\1', r'\1_crap']
  200. res = dfobj.replace(to_replace_res, values, regex=True)
  201. expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e_crap',
  202. 'f_crap',
  203. 'g_crap', 'h'],
  204. 'c': ['h', 'e_crap', 'l', 'o']})
  205. assert_frame_equal(res, expec)
  206. # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
  207. # or vN)]
  208. to_replace_res = [r'\s*(\.)\s*', r'e']
  209. values = [r'\1\1', r'crap']
  210. res = dfobj.replace(to_replace_res, values, regex=True)
  211. expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['crap', 'f', 'g',
  212. 'h'],
  213. 'c': ['h', 'crap', 'l', 'o']})
  214. assert_frame_equal(res, expec)
  215. to_replace_res = [r'\s*(\.)\s*', r'e']
  216. values = [r'\1\1', r'crap']
  217. res = dfobj.replace(value=values, regex=to_replace_res)
  218. expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['crap', 'f', 'g',
  219. 'h'],
  220. 'c': ['h', 'crap', 'l', 'o']})
  221. assert_frame_equal(res, expec)
  222. def test_regex_replace_list_obj_inplace(self):
  223. # same as above with inplace=True
  224. # lists of regexes and values
  225. obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
  226. dfobj = DataFrame(obj)
  227. # lists of regexes and values
  228. # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
  229. to_replace_res = [r'\s*\.\s*', r'e|f|g']
  230. values = [np.nan, 'crap']
  231. res = dfobj.copy()
  232. res.replace(to_replace_res, values, inplace=True, regex=True)
  233. expec = DataFrame({'a': ['a', 'b', np.nan, np.nan], 'b': ['crap'] * 3 +
  234. ['h'], 'c': ['h', 'crap', 'l', 'o']})
  235. assert_frame_equal(res, expec)
  236. # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
  237. to_replace_res = [r'\s*(\.)\s*', r'(e|f|g)']
  238. values = [r'\1\1', r'\1_crap']
  239. res = dfobj.copy()
  240. res.replace(to_replace_res, values, inplace=True, regex=True)
  241. expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e_crap',
  242. 'f_crap',
  243. 'g_crap', 'h'],
  244. 'c': ['h', 'e_crap', 'l', 'o']})
  245. assert_frame_equal(res, expec)
  246. # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
  247. # or vN)]
  248. to_replace_res = [r'\s*(\.)\s*', r'e']
  249. values = [r'\1\1', r'crap']
  250. res = dfobj.copy()
  251. res.replace(to_replace_res, values, inplace=True, regex=True)
  252. expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['crap', 'f', 'g',
  253. 'h'],
  254. 'c': ['h', 'crap', 'l', 'o']})
  255. assert_frame_equal(res, expec)
  256. to_replace_res = [r'\s*(\.)\s*', r'e']
  257. values = [r'\1\1', r'crap']
  258. res = dfobj.copy()
  259. res.replace(value=values, regex=to_replace_res, inplace=True)
  260. expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['crap', 'f', 'g',
  261. 'h'],
  262. 'c': ['h', 'crap', 'l', 'o']})
  263. assert_frame_equal(res, expec)
  264. def test_regex_replace_list_mixed(self):
  265. # mixed frame to make sure this doesn't break things
  266. mix = {'a': lrange(4), 'b': list('ab..')}
  267. dfmix = DataFrame(mix)
  268. # lists of regexes and values
  269. # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
  270. to_replace_res = [r'\s*\.\s*', r'a']
  271. values = [np.nan, 'crap']
  272. mix2 = {'a': lrange(4), 'b': list('ab..'), 'c': list('halo')}
  273. dfmix2 = DataFrame(mix2)
  274. res = dfmix2.replace(to_replace_res, values, regex=True)
  275. expec = DataFrame({'a': mix2['a'], 'b': ['crap', 'b', np.nan, np.nan],
  276. 'c': ['h', 'crap', 'l', 'o']})
  277. assert_frame_equal(res, expec)
  278. # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
  279. to_replace_res = [r'\s*(\.)\s*', r'(a|b)']
  280. values = [r'\1\1', r'\1_crap']
  281. res = dfmix.replace(to_replace_res, values, regex=True)
  282. expec = DataFrame({'a': mix['a'], 'b': ['a_crap', 'b_crap', '..',
  283. '..']})
  284. assert_frame_equal(res, expec)
  285. # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
  286. # or vN)]
  287. to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
  288. values = [r'\1\1', r'crap', r'\1_crap']
  289. res = dfmix.replace(to_replace_res, values, regex=True)
  290. expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']})
  291. assert_frame_equal(res, expec)
  292. to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
  293. values = [r'\1\1', r'crap', r'\1_crap']
  294. res = dfmix.replace(regex=to_replace_res, value=values)
  295. expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']})
  296. assert_frame_equal(res, expec)
  297. def test_regex_replace_list_mixed_inplace(self):
  298. mix = {'a': lrange(4), 'b': list('ab..')}
  299. dfmix = DataFrame(mix)
  300. # the same inplace
  301. # lists of regexes and values
  302. # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN]
  303. to_replace_res = [r'\s*\.\s*', r'a']
  304. values = [np.nan, 'crap']
  305. res = dfmix.copy()
  306. res.replace(to_replace_res, values, inplace=True, regex=True)
  307. expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b', np.nan, np.nan]})
  308. assert_frame_equal(res, expec)
  309. # list of [re1, re2, ..., reN] -> [re1, re2, .., reN]
  310. to_replace_res = [r'\s*(\.)\s*', r'(a|b)']
  311. values = [r'\1\1', r'\1_crap']
  312. res = dfmix.copy()
  313. res.replace(to_replace_res, values, inplace=True, regex=True)
  314. expec = DataFrame({'a': mix['a'], 'b': ['a_crap', 'b_crap', '..',
  315. '..']})
  316. assert_frame_equal(res, expec)
  317. # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN
  318. # or vN)]
  319. to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
  320. values = [r'\1\1', r'crap', r'\1_crap']
  321. res = dfmix.copy()
  322. res.replace(to_replace_res, values, inplace=True, regex=True)
  323. expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']})
  324. assert_frame_equal(res, expec)
  325. to_replace_res = [r'\s*(\.)\s*', r'a', r'(b)']
  326. values = [r'\1\1', r'crap', r'\1_crap']
  327. res = dfmix.copy()
  328. res.replace(regex=to_replace_res, value=values, inplace=True)
  329. expec = DataFrame({'a': mix['a'], 'b': ['crap', 'b_crap', '..', '..']})
  330. assert_frame_equal(res, expec)
  331. def test_regex_replace_dict_mixed(self):
  332. mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
  333. dfmix = DataFrame(mix)
  334. # dicts
  335. # single dict {re1: v1}, search the whole frame
  336. # need test for this...
  337. # list of dicts {re1: v1, re2: v2, ..., re3: v3}, search the whole
  338. # frame
  339. res = dfmix.replace({'b': r'\s*\.\s*'}, {'b': np.nan}, regex=True)
  340. res2 = dfmix.copy()
  341. res2.replace({'b': r'\s*\.\s*'}, {'b': np.nan},
  342. inplace=True, regex=True)
  343. expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', np.nan, np.nan], 'c':
  344. mix['c']})
  345. assert_frame_equal(res, expec)
  346. assert_frame_equal(res2, expec)
  347. # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the
  348. # whole frame
  349. res = dfmix.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, regex=True)
  350. res2 = dfmix.copy()
  351. res2.replace({'b': r'\s*(\.)\s*'}, {'b': r'\1ty'}, inplace=True,
  352. regex=True)
  353. expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', '.ty', '.ty'], 'c':
  354. mix['c']})
  355. assert_frame_equal(res, expec)
  356. assert_frame_equal(res2, expec)
  357. res = dfmix.replace(regex={'b': r'\s*(\.)\s*'}, value={'b': r'\1ty'})
  358. res2 = dfmix.copy()
  359. res2.replace(regex={'b': r'\s*(\.)\s*'}, value={'b': r'\1ty'},
  360. inplace=True)
  361. expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', '.ty', '.ty'], 'c':
  362. mix['c']})
  363. assert_frame_equal(res, expec)
  364. assert_frame_equal(res2, expec)
  365. # scalar -> dict
  366. # to_replace regex, {value: value}
  367. expec = DataFrame({'a': mix['a'], 'b': [np.nan, 'b', '.', '.'], 'c':
  368. mix['c']})
  369. res = dfmix.replace('a', {'b': np.nan}, regex=True)
  370. res2 = dfmix.copy()
  371. res2.replace('a', {'b': np.nan}, regex=True, inplace=True)
  372. assert_frame_equal(res, expec)
  373. assert_frame_equal(res2, expec)
  374. res = dfmix.replace('a', {'b': np.nan}, regex=True)
  375. res2 = dfmix.copy()
  376. res2.replace(regex='a', value={'b': np.nan}, inplace=True)
  377. expec = DataFrame({'a': mix['a'], 'b': [np.nan, 'b', '.', '.'], 'c':
  378. mix['c']})
  379. assert_frame_equal(res, expec)
  380. assert_frame_equal(res2, expec)
  381. def test_regex_replace_dict_nested(self):
  382. # nested dicts will not work until this is implemented for Series
  383. mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
  384. dfmix = DataFrame(mix)
  385. res = dfmix.replace({'b': {r'\s*\.\s*': np.nan}}, regex=True)
  386. res2 = dfmix.copy()
  387. res4 = dfmix.copy()
  388. res2.replace({'b': {r'\s*\.\s*': np.nan}}, inplace=True, regex=True)
  389. res3 = dfmix.replace(regex={'b': {r'\s*\.\s*': np.nan}})
  390. res4.replace(regex={'b': {r'\s*\.\s*': np.nan}}, inplace=True)
  391. expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', np.nan, np.nan], 'c':
  392. mix['c']})
  393. assert_frame_equal(res, expec)
  394. assert_frame_equal(res2, expec)
  395. assert_frame_equal(res3, expec)
  396. assert_frame_equal(res4, expec)
  397. def test_regex_replace_dict_nested_non_first_character(self):
  398. # GH 25259
  399. df = pd.DataFrame({'first': ['abc', 'bca', 'cab']})
  400. expected = pd.DataFrame({'first': ['.bc', 'bc.', 'c.b']})
  401. result = df.replace({'a': '.'}, regex=True)
  402. assert_frame_equal(result, expected)
  403. def test_regex_replace_dict_nested_gh4115(self):
  404. df = pd.DataFrame({'Type': ['Q', 'T', 'Q', 'Q', 'T'], 'tmp': 2})
  405. expected = DataFrame({'Type': [0, 1, 0, 0, 1], 'tmp': 2})
  406. result = df.replace({'Type': {'Q': 0, 'T': 1}})
  407. assert_frame_equal(result, expected)
  408. def test_regex_replace_list_to_scalar(self):
  409. mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
  410. df = DataFrame(mix)
  411. expec = DataFrame({'a': mix['a'], 'b': np.array([np.nan] * 4),
  412. 'c': [np.nan, np.nan, np.nan, 'd']})
  413. res = df.replace([r'\s*\.\s*', 'a|b'], np.nan, regex=True)
  414. res2 = df.copy()
  415. res3 = df.copy()
  416. res2.replace([r'\s*\.\s*', 'a|b'], np.nan, regex=True, inplace=True)
  417. res3.replace(regex=[r'\s*\.\s*', 'a|b'], value=np.nan, inplace=True)
  418. assert_frame_equal(res, expec)
  419. assert_frame_equal(res2, expec)
  420. assert_frame_equal(res3, expec)
  421. def test_regex_replace_str_to_numeric(self):
  422. # what happens when you try to replace a numeric value with a regex?
  423. mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
  424. df = DataFrame(mix)
  425. res = df.replace(r'\s*\.\s*', 0, regex=True)
  426. res2 = df.copy()
  427. res2.replace(r'\s*\.\s*', 0, inplace=True, regex=True)
  428. res3 = df.copy()
  429. res3.replace(regex=r'\s*\.\s*', value=0, inplace=True)
  430. expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', 0, 0], 'c':
  431. mix['c']})
  432. assert_frame_equal(res, expec)
  433. assert_frame_equal(res2, expec)
  434. assert_frame_equal(res3, expec)
  435. def test_regex_replace_regex_list_to_numeric(self):
  436. mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
  437. df = DataFrame(mix)
  438. res = df.replace([r'\s*\.\s*', 'b'], 0, regex=True)
  439. res2 = df.copy()
  440. res2.replace([r'\s*\.\s*', 'b'], 0, regex=True, inplace=True)
  441. res3 = df.copy()
  442. res3.replace(regex=[r'\s*\.\s*', 'b'], value=0, inplace=True)
  443. expec = DataFrame({'a': mix['a'], 'b': ['a', 0, 0, 0], 'c': ['a', 0,
  444. np.nan,
  445. 'd']})
  446. assert_frame_equal(res, expec)
  447. assert_frame_equal(res2, expec)
  448. assert_frame_equal(res3, expec)
  449. def test_regex_replace_series_of_regexes(self):
  450. mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
  451. df = DataFrame(mix)
  452. s1 = Series({'b': r'\s*\.\s*'})
  453. s2 = Series({'b': np.nan})
  454. res = df.replace(s1, s2, regex=True)
  455. res2 = df.copy()
  456. res2.replace(s1, s2, inplace=True, regex=True)
  457. res3 = df.copy()
  458. res3.replace(regex=s1, value=s2, inplace=True)
  459. expec = DataFrame({'a': mix['a'], 'b': ['a', 'b', np.nan, np.nan], 'c':
  460. mix['c']})
  461. assert_frame_equal(res, expec)
  462. assert_frame_equal(res2, expec)
  463. assert_frame_equal(res3, expec)
  464. def test_regex_replace_numeric_to_object_conversion(self):
  465. mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
  466. df = DataFrame(mix)
  467. expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']})
  468. res = df.replace(0, 'a')
  469. assert_frame_equal(res, expec)
  470. assert res.a.dtype == np.object_
  471. @pytest.mark.parametrize('metachar', ['[]', '()', r'\d', r'\w', r'\s'])
  472. def test_replace_regex_metachar(self, metachar):
  473. df = DataFrame({'a': [metachar, 'else']})
  474. result = df.replace({'a': {metachar: 'paren'}})
  475. expected = DataFrame({'a': ['paren', 'else']})
  476. assert_frame_equal(result, expected)
  477. def test_replace(self):
  478. self.tsframe['A'][:5] = np.nan
  479. self.tsframe['A'][-5:] = np.nan
  480. zero_filled = self.tsframe.replace(np.nan, -1e8)
  481. assert_frame_equal(zero_filled, self.tsframe.fillna(-1e8))
  482. assert_frame_equal(zero_filled.replace(-1e8, np.nan), self.tsframe)
  483. self.tsframe['A'][:5] = np.nan
  484. self.tsframe['A'][-5:] = np.nan
  485. self.tsframe['B'][:5] = -1e8
  486. # empty
  487. df = DataFrame(index=['a', 'b'])
  488. assert_frame_equal(df, df.replace(5, 7))
  489. # GH 11698
  490. # test for mixed data types.
  491. df = pd.DataFrame([('-', pd.to_datetime('20150101')),
  492. ('a', pd.to_datetime('20150102'))])
  493. df1 = df.replace('-', np.nan)
  494. expected_df = pd.DataFrame([(np.nan, pd.to_datetime('20150101')),
  495. ('a', pd.to_datetime('20150102'))])
  496. assert_frame_equal(df1, expected_df)
  497. def test_replace_list(self):
  498. obj = {'a': list('ab..'), 'b': list('efgh'), 'c': list('helo')}
  499. dfobj = DataFrame(obj)
  500. # lists of regexes and values
  501. # list of [v1, v2, ..., vN] -> [v1, v2, ..., vN]
  502. to_replace_res = [r'.', r'e']
  503. values = [np.nan, 'crap']
  504. res = dfobj.replace(to_replace_res, values)
  505. expec = DataFrame({'a': ['a', 'b', np.nan, np.nan],
  506. 'b': ['crap', 'f', 'g', 'h'], 'c': ['h', 'crap',
  507. 'l', 'o']})
  508. assert_frame_equal(res, expec)
  509. # list of [v1, v2, ..., vN] -> [v1, v2, .., vN]
  510. to_replace_res = [r'.', r'f']
  511. values = [r'..', r'crap']
  512. res = dfobj.replace(to_replace_res, values)
  513. expec = DataFrame({'a': ['a', 'b', '..', '..'], 'b': ['e', 'crap', 'g',
  514. 'h'],
  515. 'c': ['h', 'e', 'l', 'o']})
  516. assert_frame_equal(res, expec)
  517. def test_replace_with_empty_list(self):
  518. # GH 21977
  519. s = pd.Series([['a', 'b'], [], np.nan, [1]])
  520. df = pd.DataFrame({'col': s})
  521. expected = df
  522. result = df.replace([], np.nan)
  523. assert_frame_equal(result, expected)
  524. # GH 19266
  525. with pytest.raises(ValueError, match="cannot assign mismatch"):
  526. df.replace({np.nan: []})
  527. with pytest.raises(ValueError, match="cannot assign mismatch"):
  528. df.replace({np.nan: ['dummy', 'alt']})
  529. def test_replace_series_dict(self):
  530. # from GH 3064
  531. df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
  532. result = df.replace(0, {'zero': 0.5, 'one': 1.0})
  533. expected = DataFrame(
  534. {'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 2.0, 'b': 1.0}})
  535. assert_frame_equal(result, expected)
  536. result = df.replace(0, df.mean())
  537. assert_frame_equal(result, expected)
  538. # series to series/dict
  539. df = DataFrame({'zero': {'a': 0.0, 'b': 1}, 'one': {'a': 2.0, 'b': 0}})
  540. s = Series({'zero': 0.0, 'one': 2.0})
  541. result = df.replace(s, {'zero': 0.5, 'one': 1.0})
  542. expected = DataFrame(
  543. {'zero': {'a': 0.5, 'b': 1}, 'one': {'a': 1.0, 'b': 0.0}})
  544. assert_frame_equal(result, expected)
  545. result = df.replace(s, df.mean())
  546. assert_frame_equal(result, expected)
  547. def test_replace_convert(self):
  548. # gh 3907
  549. df = DataFrame([['foo', 'bar', 'bah'], ['bar', 'foo', 'bah']])
  550. m = {'foo': 1, 'bar': 2, 'bah': 3}
  551. rep = df.replace(m)
  552. expec = Series([np.int64] * 3)
  553. res = rep.dtypes
  554. assert_series_equal(expec, res)
  555. def test_replace_mixed(self):
  556. mf = self.mixed_frame
  557. mf.iloc[5:20, mf.columns.get_loc('foo')] = np.nan
  558. mf.iloc[-10:, mf.columns.get_loc('A')] = np.nan
  559. result = self.mixed_frame.replace(np.nan, -18)
  560. expected = self.mixed_frame.fillna(value=-18)
  561. assert_frame_equal(result, expected)
  562. assert_frame_equal(result.replace(-18, np.nan), self.mixed_frame)
  563. result = self.mixed_frame.replace(np.nan, -1e8)
  564. expected = self.mixed_frame.fillna(value=-1e8)
  565. assert_frame_equal(result, expected)
  566. assert_frame_equal(result.replace(-1e8, np.nan), self.mixed_frame)
  567. # int block upcasting
  568. df = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
  569. 'B': Series([0, 1], dtype='int64')})
  570. expected = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
  571. 'B': Series([0.5, 1], dtype='float64')})
  572. result = df.replace(0, 0.5)
  573. assert_frame_equal(result, expected)
  574. df.replace(0, 0.5, inplace=True)
  575. assert_frame_equal(df, expected)
  576. # int block splitting
  577. df = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
  578. 'B': Series([0, 1], dtype='int64'),
  579. 'C': Series([1, 2], dtype='int64')})
  580. expected = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
  581. 'B': Series([0.5, 1], dtype='float64'),
  582. 'C': Series([1, 2], dtype='int64')})
  583. result = df.replace(0, 0.5)
  584. assert_frame_equal(result, expected)
  585. # to object block upcasting
  586. df = DataFrame({'A': Series([1.0, 2.0], dtype='float64'),
  587. 'B': Series([0, 1], dtype='int64')})
  588. expected = DataFrame({'A': Series([1, 'foo'], dtype='object'),
  589. 'B': Series([0, 1], dtype='int64')})
  590. result = df.replace(2, 'foo')
  591. assert_frame_equal(result, expected)
  592. expected = DataFrame({'A': Series(['foo', 'bar'], dtype='object'),
  593. 'B': Series([0, 'foo'], dtype='object')})
  594. result = df.replace([1, 2], ['foo', 'bar'])
  595. assert_frame_equal(result, expected)
  596. # test case from
  597. df = DataFrame({'A': Series([3, 0], dtype='int64'),
  598. 'B': Series([0, 3], dtype='int64')})
  599. result = df.replace(3, df.mean().to_dict())
  600. expected = df.copy().astype('float64')
  601. m = df.mean()
  602. expected.iloc[0, 0] = m[0]
  603. expected.iloc[1, 1] = m[1]
  604. assert_frame_equal(result, expected)
  605. def test_replace_simple_nested_dict(self):
  606. df = DataFrame({'col': range(1, 5)})
  607. expected = DataFrame({'col': ['a', 2, 3, 'b']})
  608. result = df.replace({'col': {1: 'a', 4: 'b'}})
  609. assert_frame_equal(expected, result)
  610. # in this case, should be the same as the not nested version
  611. result = df.replace({1: 'a', 4: 'b'})
  612. assert_frame_equal(expected, result)
  613. def test_replace_simple_nested_dict_with_nonexistent_value(self):
  614. df = DataFrame({'col': range(1, 5)})
  615. expected = DataFrame({'col': ['a', 2, 3, 'b']})
  616. result = df.replace({-1: '-', 1: 'a', 4: 'b'})
  617. assert_frame_equal(expected, result)
  618. result = df.replace({'col': {-1: '-', 1: 'a', 4: 'b'}})
  619. assert_frame_equal(expected, result)
  620. def test_replace_value_is_none(self):
  621. orig_value = self.tsframe.iloc[0, 0]
  622. orig2 = self.tsframe.iloc[1, 0]
  623. self.tsframe.iloc[0, 0] = np.nan
  624. self.tsframe.iloc[1, 0] = 1
  625. result = self.tsframe.replace(to_replace={np.nan: 0})
  626. expected = self.tsframe.T.replace(to_replace={np.nan: 0}).T
  627. assert_frame_equal(result, expected)
  628. result = self.tsframe.replace(to_replace={np.nan: 0, 1: -1e8})
  629. tsframe = self.tsframe.copy()
  630. tsframe.iloc[0, 0] = 0
  631. tsframe.iloc[1, 0] = -1e8
  632. expected = tsframe
  633. assert_frame_equal(expected, result)
  634. self.tsframe.iloc[0, 0] = orig_value
  635. self.tsframe.iloc[1, 0] = orig2
  636. def test_replace_for_new_dtypes(self):
  637. # dtypes
  638. tsframe = self.tsframe.copy().astype(np.float32)
  639. tsframe['A'][:5] = np.nan
  640. tsframe['A'][-5:] = np.nan
  641. zero_filled = tsframe.replace(np.nan, -1e8)
  642. assert_frame_equal(zero_filled, tsframe.fillna(-1e8))
  643. assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe)
  644. tsframe['A'][:5] = np.nan
  645. tsframe['A'][-5:] = np.nan
  646. tsframe['B'][:5] = -1e8
  647. b = tsframe['B']
  648. b[b == -1e8] = np.nan
  649. tsframe['B'] = b
  650. result = tsframe.fillna(method='bfill')
  651. assert_frame_equal(result, tsframe.fillna(method='bfill'))
  652. @pytest.mark.parametrize('frame, to_replace, value, expected', [
  653. (DataFrame({'ints': [1, 2, 3]}), 1, 0,
  654. DataFrame({'ints': [0, 2, 3]})),
  655. (DataFrame({'ints': [1, 2, 3]}, dtype=np.int32), 1, 0,
  656. DataFrame({'ints': [0, 2, 3]}, dtype=np.int32)),
  657. (DataFrame({'ints': [1, 2, 3]}, dtype=np.int16), 1, 0,
  658. DataFrame({'ints': [0, 2, 3]}, dtype=np.int16)),
  659. (DataFrame({'bools': [True, False, True]}), False, True,
  660. DataFrame({'bools': [True, True, True]})),
  661. (DataFrame({'complex': [1j, 2j, 3j]}), 1j, 0,
  662. DataFrame({'complex': [0j, 2j, 3j]})),
  663. (DataFrame({'datetime64': Index([datetime(2018, 5, 28),
  664. datetime(2018, 7, 28),
  665. datetime(2018, 5, 28)])}),
  666. datetime(2018, 5, 28), datetime(2018, 7, 28),
  667. DataFrame({'datetime64': Index([datetime(2018, 7, 28)] * 3)})),
  668. # GH 20380
  669. (DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['foo']}),
  670. 'foo', 'bar',
  671. DataFrame({'dt': [datetime(3017, 12, 20)], 'str': ['bar']})),
  672. (DataFrame({'A': date_range('20130101', periods=3, tz='US/Eastern'),
  673. 'B': [0, np.nan, 2]}),
  674. Timestamp('20130102', tz='US/Eastern'),
  675. Timestamp('20130104', tz='US/Eastern'),
  676. DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
  677. Timestamp('20130104', tz='US/Eastern'),
  678. Timestamp('20130103', tz='US/Eastern')],
  679. 'B': [0, np.nan, 2]}))
  680. ])
  681. def test_replace_dtypes(self, frame, to_replace, value, expected):
  682. result = getattr(frame, 'replace')(to_replace, value)
  683. assert_frame_equal(result, expected)
  684. def test_replace_input_formats_listlike(self):
  685. # both dicts
  686. to_rep = {'A': np.nan, 'B': 0, 'C': ''}
  687. values = {'A': 0, 'B': -1, 'C': 'missing'}
  688. df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5],
  689. 'C': ['', 'asdf', 'fd']})
  690. filled = df.replace(to_rep, values)
  691. expected = {k: v.replace(to_rep[k], values[k])
  692. for k, v in compat.iteritems(df)}
  693. assert_frame_equal(filled, DataFrame(expected))
  694. result = df.replace([0, 2, 5], [5, 2, 0])
  695. expected = DataFrame({'A': [np.nan, 5, np.inf], 'B': [5, 2, 0],
  696. 'C': ['', 'asdf', 'fd']})
  697. assert_frame_equal(result, expected)
  698. # scalar to dict
  699. values = {'A': 0, 'B': -1, 'C': 'missing'}
  700. df = DataFrame({'A': [np.nan, 0, np.nan], 'B': [0, 2, 5],
  701. 'C': ['', 'asdf', 'fd']})
  702. filled = df.replace(np.nan, values)
  703. expected = {k: v.replace(np.nan, values[k])
  704. for k, v in compat.iteritems(df)}
  705. assert_frame_equal(filled, DataFrame(expected))
  706. # list to list
  707. to_rep = [np.nan, 0, '']
  708. values = [-2, -1, 'missing']
  709. result = df.replace(to_rep, values)
  710. expected = df.copy()
  711. for i in range(len(to_rep)):
  712. expected.replace(to_rep[i], values[i], inplace=True)
  713. assert_frame_equal(result, expected)
  714. msg = r"Replacement lists must match in length\. Expecting 3 got 2"
  715. with pytest.raises(ValueError, match=msg):
  716. df.replace(to_rep, values[1:])
  717. def test_replace_input_formats_scalar(self):
  718. df = DataFrame({'A': [np.nan, 0, np.inf], 'B': [0, 2, 5],
  719. 'C': ['', 'asdf', 'fd']})
  720. # dict to scalar
  721. to_rep = {'A': np.nan, 'B': 0, 'C': ''}
  722. filled = df.replace(to_rep, 0)
  723. expected = {k: v.replace(to_rep[k], 0)
  724. for k, v in compat.iteritems(df)}
  725. assert_frame_equal(filled, DataFrame(expected))
  726. msg = "value argument must be scalar, dict, or Series"
  727. with pytest.raises(TypeError, match=msg):
  728. df.replace(to_rep, [np.nan, 0, ''])
  729. # list to scalar
  730. to_rep = [np.nan, 0, '']
  731. result = df.replace(to_rep, -1)
  732. expected = df.copy()
  733. for i in range(len(to_rep)):
  734. expected.replace(to_rep[i], -1, inplace=True)
  735. assert_frame_equal(result, expected)
  736. def test_replace_limit(self):
  737. pass
  738. def test_replace_dict_no_regex(self):
  739. answer = Series({0: 'Strongly Agree', 1: 'Agree', 2: 'Neutral', 3:
  740. 'Disagree', 4: 'Strongly Disagree'})
  741. weights = {'Agree': 4, 'Disagree': 2, 'Neutral': 3, 'Strongly Agree':
  742. 5, 'Strongly Disagree': 1}
  743. expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
  744. result = answer.replace(weights)
  745. assert_series_equal(result, expected)
  746. def test_replace_series_no_regex(self):
  747. answer = Series({0: 'Strongly Agree', 1: 'Agree', 2: 'Neutral', 3:
  748. 'Disagree', 4: 'Strongly Disagree'})
  749. weights = Series({'Agree': 4, 'Disagree': 2, 'Neutral': 3,
  750. 'Strongly Agree': 5, 'Strongly Disagree': 1})
  751. expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
  752. result = answer.replace(weights)
  753. assert_series_equal(result, expected)
  754. def test_replace_dict_tuple_list_ordering_remains_the_same(self):
  755. df = DataFrame(dict(A=[np.nan, 1]))
  756. res1 = df.replace(to_replace={np.nan: 0, 1: -1e8})
  757. res2 = df.replace(to_replace=(1, np.nan), value=[-1e8, 0])
  758. res3 = df.replace(to_replace=[1, np.nan], value=[-1e8, 0])
  759. expected = DataFrame({'A': [0, -1e8]})
  760. assert_frame_equal(res1, res2)
  761. assert_frame_equal(res2, res3)
  762. assert_frame_equal(res3, expected)
  763. def test_replace_doesnt_replace_without_regex(self):
  764. raw = """fol T_opp T_Dir T_Enh
  765. 0 1 0 0 vo
  766. 1 2 vr 0 0
  767. 2 2 0 0 0
  768. 3 3 0 bt 0"""
  769. df = pd.read_csv(StringIO(raw), sep=r'\s+')
  770. res = df.replace({r'\D': 1})
  771. assert_frame_equal(df, res)
  772. def test_replace_bool_with_string(self):
  773. df = DataFrame({'a': [True, False], 'b': list('ab')})
  774. result = df.replace(True, 'a')
  775. expected = DataFrame({'a': ['a', False], 'b': df.b})
  776. assert_frame_equal(result, expected)
  777. def test_replace_pure_bool_with_string_no_op(self):
  778. df = DataFrame(np.random.rand(2, 2) > 0.5)
  779. result = df.replace('asdf', 'fdsa')
  780. assert_frame_equal(df, result)
  781. def test_replace_bool_with_bool(self):
  782. df = DataFrame(np.random.rand(2, 2) > 0.5)
  783. result = df.replace(False, True)
  784. expected = DataFrame(np.ones((2, 2), dtype=bool))
  785. assert_frame_equal(result, expected)
  786. def test_replace_with_dict_with_bool_keys(self):
  787. df = DataFrame({0: [True, False], 1: [False, True]})
  788. with pytest.raises(TypeError, match='Cannot compare types .+'):
  789. df.replace({'asdf': 'asdb', True: 'yes'})
  790. def test_replace_truthy(self):
  791. df = DataFrame({'a': [True, True]})
  792. r = df.replace([np.inf, -np.inf], np.nan)
  793. e = df
  794. assert_frame_equal(r, e)
  795. def test_replace_int_to_int_chain(self):
  796. df = DataFrame({'a': lrange(1, 5)})
  797. with pytest.raises(ValueError, match="Replacement not allowed .+"):
  798. df.replace({'a': dict(zip(range(1, 5), range(2, 6)))})
  799. def test_replace_str_to_str_chain(self):
  800. a = np.arange(1, 5)
  801. astr = a.astype(str)
  802. bstr = np.arange(2, 6).astype(str)
  803. df = DataFrame({'a': astr})
  804. with pytest.raises(ValueError, match="Replacement not allowed .+"):
  805. df.replace({'a': dict(zip(astr, bstr))})
  806. def test_replace_swapping_bug(self):
  807. df = pd.DataFrame({'a': [True, False, True]})
  808. res = df.replace({'a': {True: 'Y', False: 'N'}})
  809. expect = pd.DataFrame({'a': ['Y', 'N', 'Y']})
  810. assert_frame_equal(res, expect)
  811. df = pd.DataFrame({'a': [0, 1, 0]})
  812. res = df.replace({'a': {0: 'Y', 1: 'N'}})
  813. expect = pd.DataFrame({'a': ['Y', 'N', 'Y']})
  814. assert_frame_equal(res, expect)
  815. def test_replace_period(self):
  816. d = {
  817. 'fname': {
  818. 'out_augmented_AUG_2011.json':
  819. pd.Period(year=2011, month=8, freq='M'),
  820. 'out_augmented_JAN_2011.json':
  821. pd.Period(year=2011, month=1, freq='M'),
  822. 'out_augmented_MAY_2012.json':
  823. pd.Period(year=2012, month=5, freq='M'),
  824. 'out_augmented_SUBSIDY_WEEK.json':
  825. pd.Period(year=2011, month=4, freq='M'),
  826. 'out_augmented_AUG_2012.json':
  827. pd.Period(year=2012, month=8, freq='M'),
  828. 'out_augmented_MAY_2011.json':
  829. pd.Period(year=2011, month=5, freq='M'),
  830. 'out_augmented_SEP_2013.json':
  831. pd.Period(year=2013, month=9, freq='M')}}
  832. df = pd.DataFrame(['out_augmented_AUG_2012.json',
  833. 'out_augmented_SEP_2013.json',
  834. 'out_augmented_SUBSIDY_WEEK.json',
  835. 'out_augmented_MAY_2012.json',
  836. 'out_augmented_MAY_2011.json',
  837. 'out_augmented_AUG_2011.json',
  838. 'out_augmented_JAN_2011.json'], columns=['fname'])
  839. assert set(df.fname.values) == set(d['fname'].keys())
  840. # We don't support converting object -> specialized EA in
  841. # replace yet.
  842. expected = DataFrame({'fname': [d['fname'][k]
  843. for k in df.fname.values]},
  844. dtype=object)
  845. result = df.replace(d)
  846. assert_frame_equal(result, expected)
  847. def test_replace_datetime(self):
  848. d = {'fname':
  849. {'out_augmented_AUG_2011.json': pd.Timestamp('2011-08'),
  850. 'out_augmented_JAN_2011.json': pd.Timestamp('2011-01'),
  851. 'out_augmented_MAY_2012.json': pd.Timestamp('2012-05'),
  852. 'out_augmented_SUBSIDY_WEEK.json': pd.Timestamp('2011-04'),
  853. 'out_augmented_AUG_2012.json': pd.Timestamp('2012-08'),
  854. 'out_augmented_MAY_2011.json': pd.Timestamp('2011-05'),
  855. 'out_augmented_SEP_2013.json': pd.Timestamp('2013-09')}}
  856. df = pd.DataFrame(['out_augmented_AUG_2012.json',
  857. 'out_augmented_SEP_2013.json',
  858. 'out_augmented_SUBSIDY_WEEK.json',
  859. 'out_augmented_MAY_2012.json',
  860. 'out_augmented_MAY_2011.json',
  861. 'out_augmented_AUG_2011.json',
  862. 'out_augmented_JAN_2011.json'], columns=['fname'])
  863. assert set(df.fname.values) == set(d['fname'].keys())
  864. expected = DataFrame({'fname': [d['fname'][k]
  865. for k in df.fname.values]})
  866. result = df.replace(d)
  867. assert_frame_equal(result, expected)
  868. def test_replace_datetimetz(self):
  869. # GH 11326
  870. # behaving poorly when presented with a datetime64[ns, tz]
  871. df = DataFrame({'A': date_range('20130101', periods=3,
  872. tz='US/Eastern'),
  873. 'B': [0, np.nan, 2]})
  874. result = df.replace(np.nan, 1)
  875. expected = DataFrame({'A': date_range('20130101', periods=3,
  876. tz='US/Eastern'),
  877. 'B': Series([0, 1, 2], dtype='float64')})
  878. assert_frame_equal(result, expected)
  879. result = df.fillna(1)
  880. assert_frame_equal(result, expected)
  881. result = df.replace(0, np.nan)
  882. expected = DataFrame({'A': date_range('20130101', periods=3,
  883. tz='US/Eastern'),
  884. 'B': [np.nan, np.nan, 2]})
  885. assert_frame_equal(result, expected)
  886. result = df.replace(Timestamp('20130102', tz='US/Eastern'),
  887. Timestamp('20130104', tz='US/Eastern'))
  888. expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
  889. Timestamp('20130104', tz='US/Eastern'),
  890. Timestamp('20130103', tz='US/Eastern')],
  891. 'B': [0, np.nan, 2]})
  892. assert_frame_equal(result, expected)
  893. result = df.copy()
  894. result.iloc[1, 0] = np.nan
  895. result = result.replace(
  896. {'A': pd.NaT}, Timestamp('20130104', tz='US/Eastern'))
  897. assert_frame_equal(result, expected)
  898. # coerce to object
  899. result = df.copy()
  900. result.iloc[1, 0] = np.nan
  901. result = result.replace(
  902. {'A': pd.NaT}, Timestamp('20130104', tz='US/Pacific'))
  903. expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
  904. Timestamp('20130104', tz='US/Pacific'),
  905. Timestamp('20130103', tz='US/Eastern')],
  906. 'B': [0, np.nan, 2]})
  907. assert_frame_equal(result, expected)
  908. result = df.copy()
  909. result.iloc[1, 0] = np.nan
  910. result = result.replace({'A': np.nan}, Timestamp('20130104'))
  911. expected = DataFrame({'A': [Timestamp('20130101', tz='US/Eastern'),
  912. Timestamp('20130104'),
  913. Timestamp('20130103', tz='US/Eastern')],
  914. 'B': [0, np.nan, 2]})
  915. assert_frame_equal(result, expected)
  916. def test_replace_with_empty_dictlike(self):
  917. # GH 15289
  918. mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}
  919. df = DataFrame(mix)
  920. assert_frame_equal(df, df.replace({}))
  921. assert_frame_equal(df, df.replace(Series([])))
  922. assert_frame_equal(df, df.replace({'b': {}}))
  923. assert_frame_equal(df, df.replace(Series({'b': {}})))
  924. @pytest.mark.parametrize("to_replace, method, expected", [
  925. (0, 'bfill', {'A': [1, 1, 2],
  926. 'B': [5, np.nan, 7],
  927. 'C': ['a', 'b', 'c']}),
  928. (np.nan, 'bfill', {'A': [0, 1, 2],
  929. 'B': [5.0, 7.0, 7.0],
  930. 'C': ['a', 'b', 'c']}),
  931. ('d', 'ffill', {'A': [0, 1, 2],
  932. 'B': [5, np.nan, 7],
  933. 'C': ['a', 'b', 'c']}),
  934. ([0, 2], 'bfill', {'A': [1, 1, 2],
  935. 'B': [5, np.nan, 7],
  936. 'C': ['a', 'b', 'c']}),
  937. ([1, 2], 'pad', {'A': [0, 0, 0],
  938. 'B': [5, np.nan, 7],
  939. 'C': ['a', 'b', 'c']}),
  940. ((1, 2), 'bfill', {'A': [0, 2, 2],
  941. 'B': [5, np.nan, 7],
  942. 'C': ['a', 'b', 'c']}),
  943. (['b', 'c'], 'ffill', {'A': [0, 1, 2],
  944. 'B': [5, np.nan, 7],
  945. 'C': ['a', 'a', 'a']}),
  946. ])
  947. def test_replace_method(self, to_replace, method, expected):
  948. # GH 19632
  949. df = DataFrame({'A': [0, 1, 2],
  950. 'B': [5, np.nan, 7],
  951. 'C': ['a', 'b', 'c']})
  952. result = df.replace(to_replace=to_replace, value=None, method=method)
  953. expected = DataFrame(expected)
  954. assert_frame_equal(result, expected)