PageRenderTime 47ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/bench/bench_tseries.py

http://github.com/wesm/pandas
Python | 176 lines | 129 code | 46 blank | 1 comment | 17 complexity | c8acbaa86e2bd7b42e389bdac68a2021 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import time
  2. import numpy as np
  3. from pandas import Series, Index, isnull
  4. import pandas.lib.tseries as tseries
  5. from pandas.util.testing import assert_almost_equal, assert_dict_equal
  6. def _timeit(f, n=10):
  7. _s = time.clock()
  8. for i in xrange(n):
  9. f()
  10. return (time.clock() - _s) / n
  11. def bench_reindex():
  12. K = 100000
  13. index = Index(np.arange(K))
  14. values = np.arange(float(K))
  15. obj_vals = values.astype(object)
  16. new_index = np.arange(K)
  17. np.random.shuffle(new_index)
  18. new_index = Index(new_index)
  19. f = lambda: tseries.reindex(new_index, values, index.indexMap)
  20. print 'tseries.reindex: %.2f ms per iteration' % (_timeit(f, n=50) * 1000)
  21. def _test():
  22. filler, mask = tseries.getMergeVec(new_index, index.indexMap)
  23. result = values.take(filler)
  24. np.putmask(result, -mask, np.NaN)
  25. return result
  26. timing = _timeit(_test, n=50) * 1000
  27. print 'getMergeVec method: %.2f ms per iteration' % timing
  28. f2 = lambda: tseries.reindexObj(new_index, values, index.indexMap)
  29. print ('tseries.reindexObj with floats: %.2f ms per iteration'
  30. % (_timeit(f2, n=50) * 1000))
  31. f3 = lambda: tseries.reindexObj(new_index, obj_vals, index.indexMap)
  32. print ('tseries.reindexObj with objects: %.2f ms per iteration'
  33. % (_timeit(f3, n=50) * 1000))
  34. f4 = lambda: tseries.reindexObject(new_index, obj_vals, index.indexMap)
  35. print ('tseries.reindexObject buffers: %.2f ms per iteration'
  36. % (_timeit(f4, n=50) * 1000))
  37. def _test2():
  38. filler, mask = tseries.getMergeVec(new_index, index.indexMap)
  39. result = obj_vals.take(filler)
  40. np.putmask(result, -mask, np.NaN)
  41. return result
  42. timing = _timeit(_test2, n=50) * 1000
  43. print 'getMergeVec method: %.2f ms per iteration' % timing
  44. assert_almost_equal(_test(), f())
  45. assert_almost_equal(f2(), f3())
  46. assert_almost_equal(f3(), f4())
  47. assert_almost_equal(f2(), f4())
  48. assert_almost_equal(f2(), _test2())
  49. def _isnan(obj):
  50. return obj != obj
  51. def test_groupby():
  52. mapping = Series({
  53. 1 : 2.,
  54. 2 : 2.,
  55. 3 : np.NaN,
  56. 4 : np.NaN,
  57. 5 : 3.,
  58. 6 : 3.,
  59. 7 : np.NaN
  60. })
  61. index = Index([1, 2, 3, 4, 5, 6, 7])
  62. expected = {
  63. 2 : [1, 2],
  64. 3 : [5, 6],
  65. np.NaN : [3, 4, 7]
  66. }
  67. def compare_with_null(d1, d2):
  68. d1_nulls = None
  69. d2_nulls = None
  70. for k, v in d1.iteritems():
  71. if _isnan(k):
  72. d1_nulls = v
  73. else:
  74. assert(k in d2)
  75. assert(np.array_equal(v, d2[k]))
  76. for k, v in d2.iteritems():
  77. if _isnan(k):
  78. d2_nulls = v
  79. else:
  80. assert(k in d1)
  81. if d1_nulls is not None or d2_nulls is not None:
  82. assert(np.array_equal(d1_nulls, d2_nulls))
  83. grouped = tseries.groupby(index, mapping.get)
  84. compare_with_null(grouped, expected)
  85. def groupby_nocython(index, mapper, output=None):
  86. if output is None:
  87. result = {}
  88. else:
  89. result = output
  90. index = np.asarray(index)
  91. mapped_index = np.array([mapper(x) for x in index])
  92. # A little hack here
  93. if issubclass(mapped_index.dtype.type, basestring):
  94. mapped_index = mapped_index.astype(object)
  95. mask = isnull(mapped_index)
  96. nullkeys = index[mask]
  97. if nullkeys is not None and len(nullkeys) > 0:
  98. result[np.NaN] = nullkeys
  99. notmask = -mask
  100. index = index[notmask]
  101. mapped_index = mapped_index[notmask]
  102. for idx, key in zip(index, mapped_index):
  103. result.setdefault(key, []).append(idx)
  104. return result
  105. def bench_groupby():
  106. N = 200
  107. arr = np.arange(10000).astype(object)
  108. values = np.random.randn(10000)
  109. keys = arr // 10
  110. d = dict(zip(arr, keys))
  111. f = lambda: groupby_nocython(arr, d.get)
  112. print 'no cython: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)
  113. f = lambda: tseries.arrmap(arr, d.get)
  114. timing = _timeit(f, n=N) * 1000
  115. print 'arrmap: %.2f ms per iteration' % timing
  116. f = lambda: isnull(tseries.arrmap(arr, d.get))
  117. print 'isnull: %.2f ms per iteration' % (_timeit(f, n=N) * 1000 - timing)
  118. f = lambda: tseries.groupby(arr, d.get)
  119. print 'groupby: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)
  120. f = lambda: tseries.groupby_indices(arr, d.get)
  121. print 'groupby_inds: %.2f ms per iteration' % (_timeit(f, n=N) * 1000)
  122. def _test():
  123. groups = tseries.groupby_indices(arr, d.get)
  124. result = {}
  125. for k, v in groups.iteritems():
  126. result[k] = np.mean(values.take(v))
  127. return result
  128. print 'test: %.2f ms per iteration' % (_timeit(_test, n=N) * 1000)
  129. def bench_map_indices():
  130. pass