PageRenderTime 47ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/statsmodels/sandbox/examples/thirdparty/try_interchange.py

http://github.com/statsmodels/statsmodels
Python | 74 lines | 54 code | 4 blank | 16 comment | 5 complexity | fe3e1195efe0fc1d3f3e9c03ad7fcfac MD5 | raw file
Possible License(s): BSD-3-Clause
  1. # -*- coding: utf-8 -*-
  2. """groupmean, groupby in pandas, la and tabular from a scikits.timeseries
  3. after a question on the scipy-user mailing list I tried to do
  4. groupmeans, which in this case are duplicate dates, in the 3 packages.
  5. I'm using the versions that I had installed, which are all based on
  6. repository checkout, but are not fully up-to-date
  7. some brief comments
  8. * la.larry and pandas.DataFrame require unique labels/index so
  9. groups have to represented in a separate data structure
  10. * pandas is missing GroupBy in the docs, but the docstring is helpful
  11. * both la and pandas handle datetime objects as object arrays
  12. * tabular requires conversion to structured dtype, but easy helper
  13. functions or methods are available in scikits.timeseries and tabular
  14. * not too bad for a first try
  15. Created on Sat Jan 30 08:33:11 2010
  16. Author: josef-pktd
  17. """
  18. from statsmodels.compat.python import lrange
  19. import numpy as np
  20. import scikits.timeseries as ts
  21. import la
  22. import pandas
  23. import tabular as tb
  24. from finance import msft, ibm # hack to make it run as standalone
  25. s = ts.time_series([1,2,3,4,5],
  26. dates=ts.date_array(["2001-01","2001-01",
  27. "2001-02","2001-03","2001-03"],freq="M"))
  28. print('\nUsing la')
  29. dta = la.larry(s.data, label=[lrange(len(s.data))])
  30. dat = la.larry(s.dates.tolist(), label=[lrange(len(s.data))])
  31. s2 = ts.time_series(dta.group_mean(dat).x,dates=ts.date_array(dat.x,freq="M"))
  32. s2u = ts.remove_duplicated_dates(s2)
  33. print(repr(s))
  34. print(dat)
  35. print(repr(s2))
  36. print(repr(s2u))
  37. print('\nUsing pandas')
  38. pdta = pandas.DataFrame(s.data, np.arange(len(s.data)), [1])
  39. pa = pdta.groupby(dict(zip(np.arange(len(s.data)),
  40. s.dates.tolist()))).aggregate(np.mean)
  41. s3 = ts.time_series(pa.values.ravel(),
  42. dates=ts.date_array(pa.index.tolist(),freq="M"))
  43. print(pa)
  44. print(repr(s3))
  45. print('\nUsing tabular')
  46. X = tb.tabarray(array=s.torecords(), dtype=s.torecords().dtype)
  47. tabx = X.aggregate(On=['_dates'], AggFuncDict={'_data':np.mean,'_mask':np.all})
  48. s4 = ts.time_series(tabx['_data'],dates=ts.date_array(tabx['_dates'],freq="M"))
  49. print(tabx)
  50. print(repr(s4))
  51. #after running pandas/examples/finance.py
  52. larmsft = la.larry(msft.values, [msft.index.tolist(), msft.columns.tolist()])
  53. laribm = la.larry(ibm.values, [ibm.index.tolist(), ibm.columns.tolist()])
  54. lar1 = la.larry(np.dstack((msft.values,ibm.values)), [ibm.index.tolist(), ibm.columns.tolist(), ['msft', 'ibm']])
  55. print(lar1.mean(0))
  56. y = la.larry([[1.0, 2.0], [3.0, 4.0]], [['a', 'b'], ['c', 'd']])
  57. ysr = np.empty(y.x.shape[0],dtype=([('index','S1')]+[(i,np.float) for i in y.label[1]]))
  58. ysr['index'] = y.label[0]
  59. for i in ysr.dtype.names[1:]:
  60. ysr[i] = y[y.labelindex(i, axis=1)].x