PageRenderTime 53ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/sparse/list.py

http://github.com/pydata/pandas
Python | 142 lines | 71 code | 26 blank | 45 comment | 13 complexity | f7253b07719304cb54c0146a1e094fb8 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import numpy as np
  2. from pandas.core.base import PandasObject
  3. from pandas.core.common import pprint_thing
  4. from pandas.sparse.array import SparseArray
  5. import pandas._sparse as splib
  6. class SparseList(PandasObject):
  7. """
  8. Data structure for accumulating data to be converted into a
  9. SparseArray. Has similar API to the standard Python list
  10. Parameters
  11. ----------
  12. data : scalar or array-like
  13. fill_value : scalar, default NaN
  14. """
  15. def __init__(self, data=None, fill_value=np.nan):
  16. self.fill_value = fill_value
  17. self._chunks = []
  18. if data is not None:
  19. self.append(data)
  20. def __unicode__(self):
  21. contents = '\n'.join(repr(c) for c in self._chunks)
  22. return '%s\n%s' % (object.__repr__(self), pprint_thing(contents))
  23. def __len__(self):
  24. return sum(len(c) for c in self._chunks)
  25. def __getitem__(self, i):
  26. if i < 0:
  27. if i + len(self) < 0: # pragma: no cover
  28. raise ValueError('%d out of range' % i)
  29. i += len(self)
  30. passed = 0
  31. j = 0
  32. while i >= passed + len(self._chunks[j]):
  33. passed += len(self._chunks[j])
  34. j += 1
  35. return self._chunks[j][i - passed]
  36. def __setitem__(self, i, value):
  37. raise NotImplementedError
  38. @property
  39. def nchunks(self):
  40. return len(self._chunks)
  41. @property
  42. def is_consolidated(self):
  43. return self.nchunks == 1
  44. def consolidate(self, inplace=True):
  45. """
  46. Internally consolidate chunks of data
  47. Parameters
  48. ----------
  49. inplace : boolean, default True
  50. Modify the calling object instead of constructing a new one
  51. Returns
  52. -------
  53. splist : SparseList
  54. If inplace=False, new object, otherwise reference to existing
  55. object
  56. """
  57. if not inplace:
  58. result = self.copy()
  59. else:
  60. result = self
  61. if result.is_consolidated:
  62. return result
  63. result._consolidate_inplace()
  64. return result
  65. def _consolidate_inplace(self):
  66. new_values = np.concatenate([c.sp_values for c in self._chunks])
  67. new_index = _concat_sparse_indexes([c.sp_index for c in self._chunks])
  68. new_arr = SparseArray(new_values, sparse_index=new_index,
  69. fill_value=self.fill_value)
  70. self._chunks = [new_arr]
  71. def copy(self):
  72. """
  73. Return copy of the list
  74. Returns
  75. -------
  76. new_list : SparseList
  77. """
  78. new_splist = SparseList(fill_value=self.fill_value)
  79. new_splist._chunks = list(self._chunks)
  80. return new_splist
  81. def to_array(self):
  82. """
  83. Return SparseArray from data stored in the SparseList
  84. Returns
  85. -------
  86. sparr : SparseArray
  87. """
  88. self.consolidate(inplace=True)
  89. return self._chunks[0]
  90. def append(self, value):
  91. """
  92. Append element or array-like chunk of data to the SparseList
  93. Parameters
  94. ----------
  95. value: scalar or array-like
  96. """
  97. if np.isscalar(value):
  98. value = [value]
  99. sparr = SparseArray(value, fill_value=self.fill_value)
  100. self._chunks.append(sparr)
  101. self._consolidated = False
  102. def _concat_sparse_indexes(indexes):
  103. all_indices = []
  104. total_length = 0
  105. for index in indexes:
  106. # increment by offset
  107. inds = index.to_int_index().indices + total_length
  108. all_indices.append(inds)
  109. total_length += index.length
  110. return splib.IntIndex(total_length, np.concatenate(all_indices))