PageRenderTime 48ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/pandas/io/tests/sas/test_sas7bdat.py

http://github.com/wesm/pandas
Python | 117 lines | 102 code | 15 blank | 0 comment | 18 complexity | 9b6bbdab2c3606ff00c5d7d93083a59b MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. import pandas as pd
  2. from pandas.compat import PY2
  3. import pandas.util.testing as tm
  4. import os
  5. import io
  6. import numpy as np
  7. class TestSAS7BDAT(tm.TestCase):
  8. def setUp(self):
  9. self.dirpath = tm.get_data_path()
  10. self.data = []
  11. self.test_ix = [list(range(1, 16)), [16]]
  12. for j in 1, 2:
  13. fname = os.path.join(self.dirpath, "test_sas7bdat_%d.csv" % j)
  14. df = pd.read_csv(fname)
  15. epoch = pd.datetime(1960, 1, 1)
  16. t1 = pd.to_timedelta(df["Column4"], unit='d')
  17. df["Column4"] = epoch + t1
  18. t2 = pd.to_timedelta(df["Column12"], unit='d')
  19. df["Column12"] = epoch + t2
  20. for k in range(df.shape[1]):
  21. col = df.iloc[:, k]
  22. if col.dtype == np.int64:
  23. df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
  24. elif col.dtype == np.dtype('O'):
  25. if PY2:
  26. f = lambda x: (x.decode('utf-8') if
  27. isinstance(x, str) else x)
  28. df.iloc[:, k] = df.iloc[:, k].apply(f)
  29. self.data.append(df)
  30. def test_from_file(self):
  31. for j in 0, 1:
  32. df0 = self.data[j]
  33. for k in self.test_ix[j]:
  34. fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
  35. df = pd.read_sas(fname, encoding='utf-8')
  36. tm.assert_frame_equal(df, df0)
  37. def test_from_buffer(self):
  38. for j in 0, 1:
  39. df0 = self.data[j]
  40. for k in self.test_ix[j]:
  41. fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
  42. with open(fname, 'rb') as f:
  43. byts = f.read()
  44. buf = io.BytesIO(byts)
  45. df = pd.read_sas(buf, format="sas7bdat", encoding='utf-8')
  46. tm.assert_frame_equal(df, df0, check_exact=False)
  47. def test_from_iterator(self):
  48. for j in 0, 1:
  49. df0 = self.data[j]
  50. for k in self.test_ix[j]:
  51. fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
  52. with open(fname, 'rb') as f:
  53. byts = f.read()
  54. buf = io.BytesIO(byts)
  55. rdr = pd.read_sas(buf, format="sas7bdat",
  56. iterator=True, encoding='utf-8')
  57. df = rdr.read(2)
  58. tm.assert_frame_equal(df, df0.iloc[0:2, :])
  59. df = rdr.read(3)
  60. tm.assert_frame_equal(df, df0.iloc[2:5, :])
  61. def test_encoding_options():
  62. dirpath = tm.get_data_path()
  63. fname = os.path.join(dirpath, "test1.sas7bdat")
  64. df1 = pd.read_sas(fname)
  65. df2 = pd.read_sas(fname, encoding='utf-8')
  66. for col in df1.columns:
  67. try:
  68. df1[col] = df1[col].str.decode('utf-8')
  69. except AttributeError:
  70. pass
  71. tm.assert_frame_equal(df1, df2)
  72. from pandas.io.sas.sas7bdat import SAS7BDATReader
  73. rdr = SAS7BDATReader(fname, convert_header_text=False)
  74. df3 = rdr.read()
  75. rdr.close()
  76. for x, y in zip(df1.columns, df3.columns):
  77. assert(x == y.decode())
  78. def test_productsales():
  79. dirpath = tm.get_data_path()
  80. fname = os.path.join(dirpath, "productsales.sas7bdat")
  81. df = pd.read_sas(fname, encoding='utf-8')
  82. fname = os.path.join(dirpath, "productsales.csv")
  83. df0 = pd.read_csv(fname)
  84. vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR", "MONTH"]
  85. df0[vn] = df0[vn].astype(np.float64)
  86. tm.assert_frame_equal(df, df0)
  87. def test_12659():
  88. dirpath = tm.get_data_path()
  89. fname = os.path.join(dirpath, "test_12659.sas7bdat")
  90. df = pd.read_sas(fname)
  91. fname = os.path.join(dirpath, "test_12659.csv")
  92. df0 = pd.read_csv(fname)
  93. df0 = df0.astype(np.float64)
  94. tm.assert_frame_equal(df, df0)
  95. def test_airline():
  96. dirpath = tm.get_data_path()
  97. fname = os.path.join(dirpath, "airline.sas7bdat")
  98. df = pd.read_sas(fname)
  99. fname = os.path.join(dirpath, "airline.csv")
  100. df0 = pd.read_csv(fname)
  101. df0 = df0.astype(np.float64)
  102. tm.assert_frame_equal(df, df0, check_exact=False)