/pandas/io/tests/sas/test_sas7bdat.py

http://github.com/wesm/pandas · Python · 117 lines · 102 code · 15 blank · 0 comment · 18 complexity · 9b6bbdab2c3606ff00c5d7d93083a59b MD5 · raw file

  1. import pandas as pd
  2. from pandas.compat import PY2
  3. import pandas.util.testing as tm
  4. import os
  5. import io
  6. import numpy as np
  7. class TestSAS7BDAT(tm.TestCase):
  8. def setUp(self):
  9. self.dirpath = tm.get_data_path()
  10. self.data = []
  11. self.test_ix = [list(range(1, 16)), [16]]
  12. for j in 1, 2:
  13. fname = os.path.join(self.dirpath, "test_sas7bdat_%d.csv" % j)
  14. df = pd.read_csv(fname)
  15. epoch = pd.datetime(1960, 1, 1)
  16. t1 = pd.to_timedelta(df["Column4"], unit='d')
  17. df["Column4"] = epoch + t1
  18. t2 = pd.to_timedelta(df["Column12"], unit='d')
  19. df["Column12"] = epoch + t2
  20. for k in range(df.shape[1]):
  21. col = df.iloc[:, k]
  22. if col.dtype == np.int64:
  23. df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
  24. elif col.dtype == np.dtype('O'):
  25. if PY2:
  26. f = lambda x: (x.decode('utf-8') if
  27. isinstance(x, str) else x)
  28. df.iloc[:, k] = df.iloc[:, k].apply(f)
  29. self.data.append(df)
  30. def test_from_file(self):
  31. for j in 0, 1:
  32. df0 = self.data[j]
  33. for k in self.test_ix[j]:
  34. fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
  35. df = pd.read_sas(fname, encoding='utf-8')
  36. tm.assert_frame_equal(df, df0)
  37. def test_from_buffer(self):
  38. for j in 0, 1:
  39. df0 = self.data[j]
  40. for k in self.test_ix[j]:
  41. fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
  42. with open(fname, 'rb') as f:
  43. byts = f.read()
  44. buf = io.BytesIO(byts)
  45. df = pd.read_sas(buf, format="sas7bdat", encoding='utf-8')
  46. tm.assert_frame_equal(df, df0, check_exact=False)
  47. def test_from_iterator(self):
  48. for j in 0, 1:
  49. df0 = self.data[j]
  50. for k in self.test_ix[j]:
  51. fname = os.path.join(self.dirpath, "test%d.sas7bdat" % k)
  52. with open(fname, 'rb') as f:
  53. byts = f.read()
  54. buf = io.BytesIO(byts)
  55. rdr = pd.read_sas(buf, format="sas7bdat",
  56. iterator=True, encoding='utf-8')
  57. df = rdr.read(2)
  58. tm.assert_frame_equal(df, df0.iloc[0:2, :])
  59. df = rdr.read(3)
  60. tm.assert_frame_equal(df, df0.iloc[2:5, :])
  61. def test_encoding_options():
  62. dirpath = tm.get_data_path()
  63. fname = os.path.join(dirpath, "test1.sas7bdat")
  64. df1 = pd.read_sas(fname)
  65. df2 = pd.read_sas(fname, encoding='utf-8')
  66. for col in df1.columns:
  67. try:
  68. df1[col] = df1[col].str.decode('utf-8')
  69. except AttributeError:
  70. pass
  71. tm.assert_frame_equal(df1, df2)
  72. from pandas.io.sas.sas7bdat import SAS7BDATReader
  73. rdr = SAS7BDATReader(fname, convert_header_text=False)
  74. df3 = rdr.read()
  75. rdr.close()
  76. for x, y in zip(df1.columns, df3.columns):
  77. assert(x == y.decode())
  78. def test_productsales():
  79. dirpath = tm.get_data_path()
  80. fname = os.path.join(dirpath, "productsales.sas7bdat")
  81. df = pd.read_sas(fname, encoding='utf-8')
  82. fname = os.path.join(dirpath, "productsales.csv")
  83. df0 = pd.read_csv(fname)
  84. vn = ["ACTUAL", "PREDICT", "QUARTER", "YEAR", "MONTH"]
  85. df0[vn] = df0[vn].astype(np.float64)
  86. tm.assert_frame_equal(df, df0)
  87. def test_12659():
  88. dirpath = tm.get_data_path()
  89. fname = os.path.join(dirpath, "test_12659.sas7bdat")
  90. df = pd.read_sas(fname)
  91. fname = os.path.join(dirpath, "test_12659.csv")
  92. df0 = pd.read_csv(fname)
  93. df0 = df0.astype(np.float64)
  94. tm.assert_frame_equal(df, df0)
  95. def test_airline():
  96. dirpath = tm.get_data_path()
  97. fname = os.path.join(dirpath, "airline.sas7bdat")
  98. df = pd.read_sas(fname)
  99. fname = os.path.join(dirpath, "airline.csv")
  100. df0 = pd.read_csv(fname)
  101. df0 = df0.astype(np.float64)
  102. tm.assert_frame_equal(df, df0, check_exact=False)