PageRenderTime 373ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/itmi_vcfqc_optimized/pymodules/python2.7/lib/python/statsmodels-0.5.0-py2.7-linux-x86_64.egg/statsmodels/iolib/tests/test_foreign.py

https://gitlab.com/pooja043/Globus_Docker_4
Python | 199 lines | 175 code | 13 blank | 11 comment | 1 complexity | 06c0a9a55ad0425f331827699bfb0b4b MD5 | raw file
  1. """
  2. Tests for iolib/foreign.py
  3. """
  4. import os
  5. import warnings
  6. from datetime import datetime
  7. from numpy.testing import *
  8. import numpy as np
  9. from pandas import DataFrame, isnull
  10. import pandas.util.testing as ptesting
  11. from statsmodels.compatnp.py3k import BytesIO, asbytes
  12. import statsmodels.api as sm
  13. from statsmodels.iolib.foreign import (StataWriter, genfromdta,
  14. _datetime_to_stata_elapsed, _stata_elapsed_date_to_datetime)
  15. from statsmodels.datasets import macrodata
  16. import pandas
  17. pandas_old = int(pandas.__version__.split('.')[1]) < 9
  18. # Test precisions
  19. DECIMAL_4 = 4
  20. DECIMAL_3 = 3
  21. curdir = os.path.dirname(os.path.abspath(__file__))
  22. def test_genfromdta():
  23. #Test genfromdta vs. results/macrodta.npy created with genfromtxt.
  24. #NOTE: Stata handles data very oddly. Round tripping from csv to dta
  25. # to ndarray 2710.349 (csv) -> 2510.2491 (stata) -> 2710.34912109375
  26. # (dta/ndarray)
  27. #res2 = np.load(curdir+'/results/macrodata.npy')
  28. #res2 = res2.view((float,len(res2[0])))
  29. from results.macrodata import macrodata_result as res2
  30. res1 = genfromdta(curdir+'/../../datasets/macrodata/macrodata.dta')
  31. #res1 = res1.view((float,len(res1[0])))
  32. assert_array_equal(res1 == res2, True)
  33. def test_genfromdta_pandas():
  34. from pandas.util.testing import assert_frame_equal
  35. dta = macrodata.load_pandas().data
  36. curdir = os.path.dirname(os.path.abspath(__file__))
  37. res1 = sm.iolib.genfromdta(curdir+'/../../datasets/macrodata/macrodata.dta',
  38. pandas=True)
  39. res1 = res1.astype(float)
  40. assert_frame_equal(res1, dta)
  41. def test_stata_writer_structured():
  42. buf = BytesIO()
  43. dta = macrodata.load().data
  44. dtype = dta.dtype
  45. dta = dta.astype(np.dtype([('year', int),
  46. ('quarter', int)] + dtype.descr[2:]))
  47. writer = StataWriter(buf, dta)
  48. writer.write_file()
  49. buf.seek(0)
  50. dta2 = genfromdta(buf)
  51. assert_array_equal(dta, dta2)
  52. def test_stata_writer_array():
  53. buf = BytesIO()
  54. dta = macrodata.load().data
  55. dta = DataFrame.from_records(dta)
  56. dta.columns = ["v%d" % i for i in range(1,15)]
  57. writer = StataWriter(buf, dta.values)
  58. writer.write_file()
  59. buf.seek(0)
  60. dta2 = genfromdta(buf)
  61. dta = dta.to_records(index=False)
  62. assert_array_equal(dta, dta2)
  63. def test_missing_roundtrip():
  64. buf = BytesIO()
  65. dta = np.array([(np.nan, np.inf, "")],
  66. dtype=[("double_miss", float), ("float_miss", np.float32),
  67. ("string_miss", "a1")])
  68. writer = StataWriter(buf, dta)
  69. writer.write_file()
  70. buf.seek(0)
  71. dta = genfromdta(buf, missing_flt=np.nan)
  72. assert_(isnull(dta[0][0]))
  73. assert_(isnull(dta[0][1]))
  74. assert_(dta[0][2] == asbytes(""))
  75. dta = genfromdta(os.path.join(curdir, "results/data_missing.dta"),
  76. missing_flt=-999)
  77. assert_(np.all([dta[0][i] == -999 for i in range(5)]))
  78. def test_stata_writer_pandas():
  79. buf = BytesIO()
  80. dta = macrodata.load().data
  81. dtype = dta.dtype
  82. #as of 0.9.0 pandas only supports i8 and f8
  83. dta = dta.astype(np.dtype([('year', 'i8'),
  84. ('quarter', 'i8')] + dtype.descr[2:]))
  85. dta4 = dta.astype(np.dtype([('year', 'i4'),
  86. ('quarter', 'i4')] + dtype.descr[2:]))
  87. dta = DataFrame.from_records(dta)
  88. dta4 = DataFrame.from_records(dta4)
  89. # dta is int64 'i8' given to Stata writer
  90. writer = StataWriter(buf, dta)
  91. writer.write_file()
  92. buf.seek(0)
  93. dta2 = genfromdta(buf)
  94. dta5 = DataFrame.from_records(dta2)
  95. # dta2 is int32 'i4' returned from Stata reader
  96. if dta5.dtypes[1] is np.dtype('int64'):
  97. ptesting.assert_frame_equal(dta.reset_index(), dta5)
  98. else:
  99. # don't check index because it has different size, int32 versus int64
  100. ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]])
  101. def test_stata_writer_unicode():
  102. # make sure to test with characters outside the latin-1 encoding
  103. pass
  104. @dec.skipif(pandas_old)
  105. def test_genfromdta_datetime():
  106. results = [(datetime(2006, 11, 19, 23, 13, 20), 1479596223000,
  107. datetime(2010, 1, 20), datetime(2010, 1, 8), datetime(2010, 1, 1),
  108. datetime(1974, 7, 1), datetime(2010, 1, 1), datetime(2010, 1, 1)),
  109. (datetime(1959, 12, 31, 20, 3, 20), -1479590, datetime(1953, 10, 2),
  110. datetime(1948, 6, 10), datetime(1955, 1, 1), datetime(1955, 7, 1),
  111. datetime(1955, 1, 1), datetime(2, 1, 1))]
  112. with warnings.catch_warnings(record=True) as w:
  113. dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta"))
  114. assert_(len(w) == 1) # should get a warning for that format.
  115. assert_array_equal(dta[0].tolist(), results[0])
  116. assert_array_equal(dta[1].tolist(), results[1])
  117. with warnings.catch_warnings(record=True):
  118. dta = genfromdta(os.path.join(curdir, "results/time_series_examples.dta"),
  119. pandas=True)
  120. assert_array_equal(dta.irow(0).tolist(), results[0])
  121. assert_array_equal(dta.irow(1).tolist(), results[1])
  122. def test_date_converters():
  123. ms = [-1479597200000, -1e6, -1e5, -100, 1e5, 1e6, 1479597200000]
  124. days = [-1e5, -1200, -800, -365, -50, 0, 50, 365, 800, 1200, 1e5]
  125. weeks = [-1e4, -1e2, -53, -52, -51, 0, 51, 52, 53, 1e2, 1e4]
  126. months = [-1e4, -1e3, -100, -13, -12, -11, 0, 11, 12, 13, 100, 1e3, 1e4]
  127. quarter = [-100, -50, -5, -4, -3, 0, 3, 4, 5, 50, 100]
  128. half = [-50, 40, 30, 10, 3, 2, 1, 0, 1, 2, 3, 10, 30, 40, 50]
  129. year = [1, 50, 500, 1000, 1500, 1975, 2075]
  130. for i in ms:
  131. assert_equal(_datetime_to_stata_elapsed(
  132. _stata_elapsed_date_to_datetime(i, "tc"), "tc"), i)
  133. for i in days:
  134. assert_equal(_datetime_to_stata_elapsed(
  135. _stata_elapsed_date_to_datetime(i, "td"), "td"), i)
  136. for i in weeks:
  137. assert_equal(_datetime_to_stata_elapsed(
  138. _stata_elapsed_date_to_datetime(i, "tw"), "tw"), i)
  139. for i in months:
  140. assert_equal(_datetime_to_stata_elapsed(
  141. _stata_elapsed_date_to_datetime(i, "tm"), "tm"), i)
  142. for i in quarter:
  143. assert_equal(_datetime_to_stata_elapsed(
  144. _stata_elapsed_date_to_datetime(i, "tq"), "tq"), i)
  145. for i in half:
  146. assert_equal(_datetime_to_stata_elapsed(
  147. _stata_elapsed_date_to_datetime(i, "th"), "th"), i)
  148. for i in year:
  149. assert_equal(_datetime_to_stata_elapsed(
  150. _stata_elapsed_date_to_datetime(i, "ty"), "ty"), i)
  151. @dec.skipif(pandas_old)
  152. def test_datetime_roundtrip():
  153. dta = np.array([(1, datetime(2010, 1, 1), 2),
  154. (2, datetime(2010, 2, 1), 3),
  155. (4, datetime(2010, 3, 1), 5)],
  156. dtype=[('var1', float), ('var2', object), ('var3', float)])
  157. buf = BytesIO()
  158. writer = StataWriter(buf, dta, {"var2" : "tm"})
  159. writer.write_file()
  160. buf.seek(0)
  161. dta2 = genfromdta(buf)
  162. assert_equal(dta, dta2)
  163. dta = DataFrame.from_records(dta)
  164. buf = BytesIO()
  165. writer = StataWriter(buf, dta, {"var2" : "tm"})
  166. writer.write_file()
  167. buf.seek(0)
  168. dta2 = genfromdta(buf, pandas=True)
  169. ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1))
  170. if __name__ == "__main__":
  171. import nose
  172. nose.runmodule(argv=[__file__,'-vvs','-x','--pdb'],
  173. exit=False)