PageRenderTime 47ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/io/tests/test_json_norm.py

http://github.com/pydata/pandas
Python | 207 lines | 164 code | 42 blank | 1 comment | 5 complexity | 1ef44b7086c7827249a0ff53b03c8dff MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import nose
  2. from pandas import DataFrame
  3. import numpy as np
  4. import pandas.util.testing as tm
  5. from pandas.io.json import json_normalize, nested_to_record
  6. def _assert_equal_data(left, right):
  7. if not left.columns.equals(right.columns):
  8. left = left.reindex(columns=right.columns)
  9. tm.assert_frame_equal(left, right)
  10. class TestJSONNormalize(tm.TestCase):
  11. def setUp(self):
  12. self.state_data = [
  13. {'counties': [{'name': 'Dade', 'population': 12345},
  14. {'name': 'Broward', 'population': 40000},
  15. {'name': 'Palm Beach', 'population': 60000}],
  16. 'info': {'governor': 'Rick Scott'},
  17. 'shortname': 'FL',
  18. 'state': 'Florida'},
  19. {'counties': [{'name': 'Summit', 'population': 1234},
  20. {'name': 'Cuyahoga', 'population': 1337}],
  21. 'info': {'governor': 'John Kasich'},
  22. 'shortname': 'OH',
  23. 'state': 'Ohio'}]
  24. def test_simple_records(self):
  25. recs = [{'a': 1, 'b': 2, 'c': 3},
  26. {'a': 4, 'b': 5, 'c': 6},
  27. {'a': 7, 'b': 8, 'c': 9},
  28. {'a': 10, 'b': 11, 'c': 12}]
  29. result = json_normalize(recs)
  30. expected = DataFrame(recs)
  31. tm.assert_frame_equal(result, expected)
  32. def test_simple_normalize(self):
  33. result = json_normalize(self.state_data[0], 'counties')
  34. expected = DataFrame(self.state_data[0]['counties'])
  35. tm.assert_frame_equal(result, expected)
  36. result = json_normalize(self.state_data, 'counties')
  37. expected = []
  38. for rec in self.state_data:
  39. expected.extend(rec['counties'])
  40. expected = DataFrame(expected)
  41. tm.assert_frame_equal(result, expected)
  42. result = json_normalize(self.state_data, 'counties', meta='state')
  43. expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
  44. tm.assert_frame_equal(result, expected)
  45. def test_more_deeply_nested(self):
  46. data = [{'country': 'USA',
  47. 'states': [{'name': 'California',
  48. 'cities': [{'name': 'San Francisco',
  49. 'pop': 12345},
  50. {'name': 'Los Angeles',
  51. 'pop': 12346}]
  52. },
  53. {'name': 'Ohio',
  54. 'cities': [{'name': 'Columbus',
  55. 'pop': 1234},
  56. {'name': 'Cleveland',
  57. 'pop': 1236}]}
  58. ]
  59. },
  60. {'country': 'Germany',
  61. 'states': [{'name': 'Bayern',
  62. 'cities': [{'name': 'Munich', 'pop': 12347}]
  63. },
  64. {'name': 'Nordrhein-Westfalen',
  65. 'cities': [{'name': 'Duesseldorf', 'pop': 1238},
  66. {'name': 'Koeln', 'pop': 1239}]}
  67. ]
  68. }
  69. ]
  70. result = json_normalize(data, ['states', 'cities'],
  71. meta=['country', ['states', 'name']])
  72. # meta_prefix={'states': 'state_'})
  73. ex_data = {'country': ['USA'] * 4 + ['Germany'] * 3,
  74. 'states.name': ['California', 'California', 'Ohio', 'Ohio',
  75. 'Bayern', 'Nordrhein-Westfalen',
  76. 'Nordrhein-Westfalen'],
  77. 'name': ['San Francisco', 'Los Angeles', 'Columbus',
  78. 'Cleveland', 'Munich', 'Duesseldorf', 'Koeln'],
  79. 'pop': [12345, 12346, 1234, 1236, 12347, 1238, 1239]}
  80. expected = DataFrame(ex_data, columns=result.columns)
  81. tm.assert_frame_equal(result, expected)
  82. def test_shallow_nested(self):
  83. data = [{'state': 'Florida',
  84. 'shortname': 'FL',
  85. 'info': {
  86. 'governor': 'Rick Scott'
  87. },
  88. 'counties': [{'name': 'Dade', 'population': 12345},
  89. {'name': 'Broward', 'population': 40000},
  90. {'name': 'Palm Beach', 'population': 60000}]},
  91. {'state': 'Ohio',
  92. 'shortname': 'OH',
  93. 'info': {
  94. 'governor': 'John Kasich'
  95. },
  96. 'counties': [{'name': 'Summit', 'population': 1234},
  97. {'name': 'Cuyahoga', 'population': 1337}]}]
  98. result = json_normalize(data, 'counties',
  99. ['state', 'shortname',
  100. ['info', 'governor']])
  101. ex_data = {'name': ['Dade', 'Broward', 'Palm Beach', 'Summit',
  102. 'Cuyahoga'],
  103. 'state': ['Florida'] * 3 + ['Ohio'] * 2,
  104. 'shortname': ['FL', 'FL', 'FL', 'OH', 'OH'],
  105. 'info.governor': ['Rick Scott'] * 3 + ['John Kasich'] * 2,
  106. 'population': [12345, 40000, 60000, 1234, 1337]}
  107. expected = DataFrame(ex_data, columns=result.columns)
  108. tm.assert_frame_equal(result, expected)
  109. def test_meta_name_conflict(self):
  110. data = [{'foo': 'hello',
  111. 'bar': 'there',
  112. 'data': [{'foo': 'something', 'bar': 'else'},
  113. {'foo': 'something2', 'bar': 'else2'}]}]
  114. self.assertRaises(ValueError, json_normalize, data,
  115. 'data', meta=['foo', 'bar'])
  116. result = json_normalize(data, 'data', meta=['foo', 'bar'],
  117. meta_prefix='meta')
  118. for val in ['metafoo', 'metabar', 'foo', 'bar']:
  119. self.assertTrue(val in result)
  120. def test_record_prefix(self):
  121. result = json_normalize(self.state_data[0], 'counties')
  122. expected = DataFrame(self.state_data[0]['counties'])
  123. tm.assert_frame_equal(result, expected)
  124. result = json_normalize(self.state_data, 'counties',
  125. meta='state',
  126. record_prefix='county_')
  127. expected = []
  128. for rec in self.state_data:
  129. expected.extend(rec['counties'])
  130. expected = DataFrame(expected)
  131. expected = expected.rename(columns=lambda x: 'county_' + x)
  132. expected['state'] = np.array(['Florida', 'Ohio']).repeat([3, 2])
  133. tm.assert_frame_equal(result, expected)
  134. class TestNestedToRecord(tm.TestCase):
  135. def test_flat_stays_flat(self):
  136. recs = [dict(flat1=1,flat2=2),
  137. dict(flat1=3,flat2=4),
  138. ]
  139. result = nested_to_record(recs)
  140. expected = recs
  141. self.assertEqual(result, expected)
  142. def test_one_level_deep_flattens(self):
  143. data = dict(flat1=1,
  144. dict1=dict(c=1,d=2))
  145. result = nested_to_record(data)
  146. expected = {'dict1.c': 1,
  147. 'dict1.d': 2,
  148. 'flat1': 1}
  149. self.assertEqual(result,expected)
  150. def test_nested_flattens(self):
  151. data = dict(flat1=1,
  152. dict1=dict(c=1,d=2),
  153. nested=dict(e=dict(c=1,d=2),
  154. d=2))
  155. result = nested_to_record(data)
  156. expected = {'dict1.c': 1,
  157. 'dict1.d': 2,
  158. 'flat1': 1,
  159. 'nested.d': 2,
  160. 'nested.e.c': 1,
  161. 'nested.e.d': 2}
  162. self.assertEqual(result,expected)
  163. if __name__ == '__main__':
  164. nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
  165. '--pdb-failure', '-s'], exit=False)