PageRenderTime 58ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/io/tests/json/test_json_norm.py

http://github.com/wesm/pandas
Python | 230 lines | 183 code | 46 blank | 1 comment | 7 complexity | 05f692de09335d8a2c0e6c1bacb987ed MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import nose
  2. from pandas import DataFrame
  3. import numpy as np
  4. import json
  5. import pandas.util.testing as tm
  6. from pandas import compat
  7. from pandas.io.json import json_normalize, nested_to_record
  8. def _assert_equal_data(left, right):
  9. if not left.columns.equals(right.columns):
  10. left = left.reindex(columns=right.columns)
  11. tm.assert_frame_equal(left, right)
  12. class TestJSONNormalize(tm.TestCase):
  13. def setUp(self):
  14. self.state_data = [
  15. {'counties': [{'name': 'Dade', 'population': 12345},
  16. {'name': 'Broward', 'population': 40000},
  17. {'name': 'Palm Beach', 'population': 60000}],
  18. 'info': {'governor': 'Rick Scott'},
  19. 'shortname': 'FL',
  20. 'state': 'Florida'},
  21. {'counties': [{'name': 'Summit', 'population': 1234},
  22. {'name': 'Cuyahoga', 'population': 1337}],
  23. 'info': {'governor': 'John Kasich'},
  24. 'shortname': 'OH',
  25. 'state': 'Ohio'}]
  26. def test_simple_records(self):
  27. recs = [{'a': 1, 'b': 2, 'c': 3},
  28. {'a': 4, 'b': 5, 'c': 6},
  29. {'a': 7, 'b': 8, 'c': 9},
  30. {'a': 10, 'b': 11, 'c': 12}]
  31. result = json_normalize(recs)
  32. expected = DataFrame(recs)
  33. tm.assert_frame_equal(result, expected)
  34. def test_simple_normalize(self):
  35. result = json_normalize(self.state_data[0], 'counties')
  36. expected = DataFrame(self.state_data[0]['counties'])
  37. tm.assert_frame_equal(result, expected)
  38. result = json_normalize(self.state_data, 'counties')
  39. expected = []
  40. for rec in self.state_data:
  41. expected.extend(rec['counties'])
  42. expected = DataFrame(expected)
  43. tm.assert_frame_equal(result, expected)
  44. result = json_normalize(self.state_data, 'counties', meta='state')
  45. expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
  46. tm.assert_frame_equal(result, expected)
  47. def test_more_deeply_nested(self):
  48. data = [{'country': 'USA',
  49. 'states': [{'name': 'California',
  50. 'cities': [{'name': 'San Francisco',
  51. 'pop': 12345},
  52. {'name': 'Los Angeles',
  53. 'pop': 12346}]
  54. },
  55. {'name': 'Ohio',
  56. 'cities': [{'name': 'Columbus',
  57. 'pop': 1234},
  58. {'name': 'Cleveland',
  59. 'pop': 1236}]}
  60. ]
  61. },
  62. {'country': 'Germany',
  63. 'states': [{'name': 'Bayern',
  64. 'cities': [{'name': 'Munich', 'pop': 12347}]
  65. },
  66. {'name': 'Nordrhein-Westfalen',
  67. 'cities': [{'name': 'Duesseldorf', 'pop': 1238},
  68. {'name': 'Koeln', 'pop': 1239}]}
  69. ]
  70. }
  71. ]
  72. result = json_normalize(data, ['states', 'cities'],
  73. meta=['country', ['states', 'name']])
  74. # meta_prefix={'states': 'state_'})
  75. ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
  76. 'states.name': ['California', 'California', 'Ohio', 'Ohio',
  77. 'Bayern', 'Nordrhein-Westfalen',
  78. 'Nordrhein-Westfalen'],
  79. 'name': ['San Francisco', 'Los Angeles', 'Columbus',
  80. 'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
  81. 'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}
  82. expected = DataFrame(ex_data, columns=result.columns)
  83. tm.assert_frame_equal(result, expected)
  84. def test_shallow_nested(self):
  85. data = [{'state': 'Florida',
  86. 'shortname': 'FL',
  87. 'info': {
  88. 'governor': 'Rick Scott'
  89. },
  90. 'counties': [{'name': 'Dade', 'population': 12345},
  91. {'name': 'Broward', 'population': 40000},
  92. {'name': 'Palm Beach', 'population': 60000}]},
  93. {'state': 'Ohio',
  94. 'shortname': 'OH',
  95. 'info': {
  96. 'governor': 'John Kasich'
  97. },
  98. 'counties': [{'name': 'Summit', 'population': 1234},
  99. {'name': 'Cuyahoga', 'population': 1337}]}]
  100. result = json_normalize(data, 'counties',
  101. ['state', 'shortname',
  102. ['info', 'governor']])
  103. ex_data = {'name': ['Dade', 'Broward', 'Palm Beach', 'Summit',
  104. 'Cuyahoga'],
  105. 'state': ['Florida'] * 3 + ['Ohio'] * 2,
  106. 'shortname': ['FL', 'FL', 'FL', 'OH', 'OH'],
  107. 'info.governor': ['Rick Scott'] * 3 + ['John Kasich'] * 2,
  108. 'population': [12345, 40000, 60000, 1234, 1337]}
  109. expected = DataFrame(ex_data, columns=result.columns)
  110. tm.assert_frame_equal(result, expected)
  111. def test_meta_name_conflict(self):
  112. data = [{'foo': 'hello',
  113. 'bar': 'there',
  114. 'data': [{'foo': 'something', 'bar': 'else'},
  115. {'foo': 'something2', 'bar': 'else2'}]}]
  116. self.assertRaises(ValueError, json_normalize, data,
  117. 'data', meta=['foo', 'bar'])
  118. result = json_normalize(data, 'data', meta=['foo', 'bar'],
  119. meta_prefix='meta')
  120. for val in ['metafoo', 'metabar', 'foo', 'bar']:
  121. self.assertTrue(val in result)
  122. def test_record_prefix(self):
  123. result = json_normalize(self.state_data[0], 'counties')
  124. expected = DataFrame(self.state_data[0]['counties'])
  125. tm.assert_frame_equal(result, expected)
  126. result = json_normalize(self.state_data, 'counties',
  127. meta='state',
  128. record_prefix='county_')
  129. expected = []
  130. for rec in self.state_data:
  131. expected.extend(rec['counties'])
  132. expected = DataFrame(expected)
  133. expected = expected.rename(columns=lambda x: 'county_' + x)
  134. expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
  135. tm.assert_frame_equal(result, expected)
  136. def test_non_ascii_key(self):
  137. if compat.PY3:
  138. testjson = (
  139. b'[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},' +
  140. b'{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]'
  141. ).decode('utf8')
  142. else:
  143. testjson = ('[{"\xc3\x9cnic\xc3\xb8de":0,"sub":{"A":1, "B":2}},'
  144. '{"\xc3\x9cnic\xc3\xb8de":1,"sub":{"A":3, "B":4}}]')
  145. testdata = {
  146. u'sub.A': [1, 3],
  147. u'sub.B': [2, 4],
  148. b"\xc3\x9cnic\xc3\xb8de".decode('utf8'): [0, 1]
  149. }
  150. expected = DataFrame(testdata)
  151. result = json_normalize(json.loads(testjson))
  152. tm.assert_frame_equal(result, expected)
  153. class TestNestedToRecord(tm.TestCase):
  154. def test_flat_stays_flat(self):
  155. recs = [dict(flat1=1, flat2=2),
  156. dict(flat1=3, flat2=4),
  157. ]
  158. result = nested_to_record(recs)
  159. expected = recs
  160. self.assertEqual(result, expected)
  161. def test_one_level_deep_flattens(self):
  162. data = dict(flat1=1,
  163. dict1=dict(c=1, d=2))
  164. result = nested_to_record(data)
  165. expected = {'dict1.c': 1,
  166. 'dict1.d': 2,
  167. 'flat1': 1}
  168. self.assertEqual(result, expected)
  169. def test_nested_flattens(self):
  170. data = dict(flat1=1,
  171. dict1=dict(c=1, d=2),
  172. nested=dict(e=dict(c=1, d=2),
  173. d=2))
  174. result = nested_to_record(data)
  175. expected = {'dict1.c': 1,
  176. 'dict1.d': 2,
  177. 'flat1': 1,
  178. 'nested.d': 2,
  179. 'nested.e.c': 1,
  180. 'nested.e.d': 2}
  181. self.assertEqual(result, expected)
  182. if __name__ == '__main__':
  183. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
  184. '--pdb-failure', '-s'], exit=False)