PageRenderTime 44ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/pandasreg/stats.py

https://github.com/abielr/pandasreg
Python | 133 lines | 101 code | 18 blank | 14 comment | 2 complexity | 088f18d13423b339968c511ef5ff8b61 MD5 | raw file
  1. import os
  2. import subprocess
  3. import uuid
  4. import glob
  5. import numpy as np
  6. import pandas as pd
  7. from pandasreg.rperiod import RPeriodIndex, RFrequency, RPeriod
  8. def d(series, n=1):
  9. """Difference over n periods"""
  10. return series-series.shift(n)
  11. def da(series, n=1):
  12. """Difference over n periods, annualized"""
  13. return (series-series.shift(n))*series.index.freq.periodicity
  14. def dy(series, n=1):
  15. """Difference over n years"""
  16. return (series-series.shift(n*series.index.freq.periodicity))
  17. def dya(series, n=1):
  18. """Difference over n years, annualized"""
  19. return (series-series.shift(n*series.index.freq.periodicity)) / n
  20. def logd(series, n=1):
  21. """Log difference over n periods"""
  22. return (np.log(series)-np.log(series.shift(n)))*100
  23. def logda(series, n=1):
  24. """Log difference over n periods, annualized"""
  25. return (np.log(series)-np.log(series.shift(n)))*series.index.freq.periodicity*100
  26. def logdy(series, n=1):
  27. """Log difference over n years"""
  28. return (np.log(series)-np.log(series.shift(n*series.index.freq.periodicity)))*100
  29. def logdya(series, n=1):
  30. """Log difference over n years, annualized"""
  31. return (np.log(series)-np.log(series.shift(n*series.index.freq.periodicity)))*100.0/n
  32. def pc(series, n=1):
  33. """Percent change over n periods"""
  34. return (series/series.shift(n)-1)*100
  35. def pca(series, n=1):
  36. """Percent change over n periods, annualized"""
  37. return ((series/series.shift(n))**(1.0*series.index.freq.periodicity/n)-1)*100
  38. def pcy(series, n=1):
  39. """Percent change over n years"""
  40. return (series/series.shift(n*series.index.freq.periodicity)-1)*100
  41. def pcya(series, n=1):
  42. """Percent change over n years, annualized"""
  43. return ((series/series.shift(n*series.index.freq.periodicity))**(1.0/n)-1)*100
  44. def x12(series, executable, tmpdir):
  45. """Run US Census Bureau's X-12 ARIMA on a function
  46. Arguments:
  47. series (pd.Series): Monthly or quarterly time series to seasonally
  48. adjust.
  49. executable (str): Path to the X-12 executable
  50. tmpdir (str): Path to a writable directory where X-12 input and output
  51. files can be stored temporarily.
  52. """
  53. if series.index.freq.freqstr == "M":
  54. if len(series.values) < 36:
  55. raise ValueError("Must have at least three years of data")
  56. start = series.index[0].strftime("%Y.%m")
  57. period = 12
  58. elif series.index.freq.freqstr.startswith("Q"):
  59. if len(series.values) < 12:
  60. raise ValueError("Must have at least three years of data")
  61. quarter = str((series.index[0].to_datetime().month-1)/3+1)
  62. start = series.index[0].strftime("%Y")+"."+quarter
  63. period = 4
  64. else:
  65. return series # Can only do adjustment on monthly and quarterly data with X-12
  66. template = """
  67. series{
  68. title="Test"
  69. start=%s
  70. period=%d
  71. data=(
  72. %s
  73. )
  74. span=(%s,)
  75. }
  76. x11{
  77. print=(d11)
  78. }
  79. """ % (start, period, "\n".join([str(x) for x in series.values]), start)
  80. spcpath = tmpdir
  81. spcname = str(uuid.uuid4())
  82. spcfile = spcpath+spcname+".spc"
  83. with open(spcfile, "w") as f:
  84. f.write(template)
  85. subprocess.call([executable, spcpath+spcname, "-Q", "-P", "-N", "-R"],
  86. stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
  87. with open(spcpath+spcname+".out") as f:
  88. divcount = 0
  89. line = f.readline()
  90. while divcount < 2:
  91. if line.startswith(" -----"):
  92. divcount += 1
  93. line = f.readline()
  94. values = []
  95. while line.find("AVGE") < 0:
  96. if line[2:6].isdigit():
  97. if series.index.freq.freqstr == "M":
  98. values += line.strip().split()[1:]
  99. line = f.readline()
  100. values += line.strip().split()[:-1]
  101. else: # if quarterly
  102. values += line.strip().split()[1:-1]
  103. line = f.readline()
  104. values = [float(value) for value in values]
  105. for filename in glob.glob(spcpath+"/"+spcname+"*"):
  106. os.remove(filename)
  107. return type(series)(values, index=series.index)