PageRenderTime 39ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/sparse/list.py

http://github.com/wesm/pandas
Python | 143 lines | 72 code | 26 blank | 45 comment | 13 complexity | 98857cf2b619547b267cde22ee44a502 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import numpy as np
  2. from pandas.core.base import PandasObject
  3. from pandas.formats.printing import pprint_thing
  4. from pandas.types.common import is_scalar
  5. from pandas.sparse.array import SparseArray
  6. import pandas._sparse as splib
  7. class SparseList(PandasObject):
  8. """
  9. Data structure for accumulating data to be converted into a
  10. SparseArray. Has similar API to the standard Python list
  11. Parameters
  12. ----------
  13. data : scalar or array-like
  14. fill_value : scalar, default NaN
  15. """
  16. def __init__(self, data=None, fill_value=np.nan):
  17. self.fill_value = fill_value
  18. self._chunks = []
  19. if data is not None:
  20. self.append(data)
  21. def __unicode__(self):
  22. contents = '\n'.join(repr(c) for c in self._chunks)
  23. return '%s\n%s' % (object.__repr__(self), pprint_thing(contents))
  24. def __len__(self):
  25. return sum(len(c) for c in self._chunks)
  26. def __getitem__(self, i):
  27. if i < 0:
  28. if i + len(self) < 0: # pragma: no cover
  29. raise ValueError('%d out of range' % i)
  30. i += len(self)
  31. passed = 0
  32. j = 0
  33. while i >= passed + len(self._chunks[j]):
  34. passed += len(self._chunks[j])
  35. j += 1
  36. return self._chunks[j][i - passed]
  37. def __setitem__(self, i, value):
  38. raise NotImplementedError
  39. @property
  40. def nchunks(self):
  41. return len(self._chunks)
  42. @property
  43. def is_consolidated(self):
  44. return self.nchunks == 1
  45. def consolidate(self, inplace=True):
  46. """
  47. Internally consolidate chunks of data
  48. Parameters
  49. ----------
  50. inplace : boolean, default True
  51. Modify the calling object instead of constructing a new one
  52. Returns
  53. -------
  54. splist : SparseList
  55. If inplace=False, new object, otherwise reference to existing
  56. object
  57. """
  58. if not inplace:
  59. result = self.copy()
  60. else:
  61. result = self
  62. if result.is_consolidated:
  63. return result
  64. result._consolidate_inplace()
  65. return result
  66. def _consolidate_inplace(self):
  67. new_values = np.concatenate([c.sp_values for c in self._chunks])
  68. new_index = _concat_sparse_indexes([c.sp_index for c in self._chunks])
  69. new_arr = SparseArray(new_values, sparse_index=new_index,
  70. fill_value=self.fill_value)
  71. self._chunks = [new_arr]
  72. def copy(self):
  73. """
  74. Return copy of the list
  75. Returns
  76. -------
  77. new_list : SparseList
  78. """
  79. new_splist = SparseList(fill_value=self.fill_value)
  80. new_splist._chunks = list(self._chunks)
  81. return new_splist
  82. def to_array(self):
  83. """
  84. Return SparseArray from data stored in the SparseList
  85. Returns
  86. -------
  87. sparr : SparseArray
  88. """
  89. self.consolidate(inplace=True)
  90. return self._chunks[0]
  91. def append(self, value):
  92. """
  93. Append element or array-like chunk of data to the SparseList
  94. Parameters
  95. ----------
  96. value: scalar or array-like
  97. """
  98. if is_scalar(value):
  99. value = [value]
  100. sparr = SparseArray(value, fill_value=self.fill_value)
  101. self._chunks.append(sparr)
  102. self._consolidated = False
  103. def _concat_sparse_indexes(indexes):
  104. all_indices = []
  105. total_length = 0
  106. for index in indexes:
  107. # increment by offset
  108. inds = index.to_int_index().indices + total_length
  109. all_indices.append(inds)
  110. total_length += index.length
  111. return splib.IntIndex(total_length, np.concatenate(all_indices))