PageRenderTime 37ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/rpy/robjects/tests/testPandasConversions.py

https://bitbucket.org/breisfeld/rpy2_w32_fix
Python | 139 lines | 119 code | 16 blank | 4 comment | 9 complexity | 39073e73a8210db9c06e251ecf1b7e43 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause
  1. import unittest
  2. import rpy2.robjects as robjects
  3. from rpy2.robjects import conversion
  4. import rpy2.rinterface as rinterface
  5. from collections import OrderedDict
  6. from datetime import datetime
  7. has_pandas = True
  8. try:
  9. import pandas
  10. import numpy
  11. has_pandas = True
  12. except:
  13. has_pandas = False
  14. if has_pandas:
  15. import rpy2.robjects.pandas2ri as rpyp
  16. @unittest.skipUnless(has_pandas, "The Python package is not installed: functionalities associated with it cannot be tested.")
  17. class PandasConversionsTestCase(unittest.TestCase):
  18. def testActivate(self):
  19. #FIXME: is the following still making sense ?
  20. self.assertNotEqual(rpyp.py2ri, robjects.conversion.py2ri)
  21. l = len(robjects.conversion.py2ri.registry)
  22. k = set(robjects.conversion.py2ri.registry.keys())
  23. rpyp.activate()
  24. self.assertTrue(len(conversion.py2ri.registry) > l)
  25. rpyp.deactivate()
  26. self.assertEqual(l, len(conversion.py2ri.registry))
  27. self.assertEqual(k, set(conversion.py2ri.registry.keys()))
  28. def testActivateTwice(self):
  29. #FIXME: is the following still making sense ?
  30. self.assertNotEqual(rpyp.py2ri, robjects.conversion.py2ri)
  31. l = len(robjects.conversion.py2ri.registry)
  32. k = set(robjects.conversion.py2ri.registry.keys())
  33. rpyp.activate()
  34. rpyp.deactivate()
  35. rpyp.activate()
  36. self.assertTrue(len(conversion.py2ri.registry) > l)
  37. rpyp.deactivate()
  38. self.assertEqual(l, len(conversion.py2ri.registry))
  39. self.assertEqual(k, set(conversion.py2ri.registry.keys()))
  40. def testDataFrame(self):
  41. l = (('b', numpy.array([True, False, True], dtype=numpy.bool_)),
  42. ('i', numpy.array([1, 2, 3], dtype="i")),
  43. ('f', numpy.array([1, 2, 3], dtype="f")),
  44. ('s', numpy.array(["a", "b", "c"], dtype="S")),
  45. ('u', numpy.array([u"a", u"b", u"c"], dtype="U")),
  46. ('dates', [datetime(2012, 5, 2),
  47. datetime(2012, 6, 3),
  48. datetime(2012, 7, 1)]))
  49. od = OrderedDict(l)
  50. pd_df = pandas.core.frame.DataFrame(od)
  51. rpyp.activate()
  52. rp_df = robjects.conversion.py2ri(pd_df)
  53. rpyp.deactivate()
  54. self.assertEqual(pd_df.shape[0], rp_df.nrow)
  55. self.assertEqual(pd_df.shape[1], rp_df.ncol)
  56. def testSeries(self):
  57. Series = pandas.core.series.Series
  58. s = Series(numpy.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
  59. rpyp.activate()
  60. rp_s = robjects.conversion.py2ri(s)
  61. rpyp.deactivate()
  62. self.assertEqual(rinterface.FloatSexpVector, type(rp_s))
  63. def testSeries_issue264(self):
  64. Series = pandas.core.series.Series
  65. s = Series(('a', 'b', 'c', 'd', 'e'),
  66. index=pandas.Int64Index([0,1,2,3,4]))
  67. rpyp.activate()
  68. rp_s = robjects.conversion.py2ri(s)
  69. rpyp.deactivate()
  70. # segfault before the fix
  71. str(rp_s)
  72. self.assertEqual(rinterface.ListSexpVector, type(rp_s))
  73. def testCategorical(self):
  74. factor = robjects.vectors.FactorVector(('a', 'b', 'a'))
  75. rpyp.activate()
  76. rp_c = robjects.conversion.ri2py(factor)
  77. rpyp.deactivate()
  78. self.assertEqual(pandas.Categorical, type(rp_c))
  79. def testRepr(self):
  80. # this should go to testVector, with other tests for repr()
  81. l = (('b', numpy.array([True, False, True], dtype=numpy.bool_)),
  82. ('i', numpy.array([1, 2, 3], dtype="i")),
  83. ('f', numpy.array([1, 2, 3], dtype="f")),
  84. ('s', numpy.array(["a", "b", "c"], dtype="S")),
  85. ('u', numpy.array([u"a", u"b", u"c"], dtype="U")))
  86. od = OrderedDict(l)
  87. pd_df = pandas.core.frame.DataFrame(od)
  88. rpyp.activate()
  89. rp_df = robjects.conversion.py2ri(pd_df)
  90. rpyp.deactivate()
  91. s = repr(rp_df) # used to fail with a TypeError
  92. s = s.split('\n')
  93. self.assertEqual('[Array, Array, Array, FactorV..., FactorV...]', s[1].strip())
  94. def testRi2pandas(self):
  95. rdataf = robjects.r('data.frame(a=1:2, b=I(c("a", "b")), c=c("a", "b"))')
  96. rpyp.activate()
  97. pandas_df = robjects.conversion.ri2py(rdataf)
  98. rpyp.deactivate()
  99. self.assertIsInstance(pandas_df, pandas.DataFrame)
  100. self.assertEquals(('a', 'b', 'c'), tuple(pandas_df.keys()))
  101. self.assertEquals(pandas_df['a'].dtype, numpy.dtype('int32'))
  102. self.assertEquals(pandas_df['b'].dtype, numpy.dtype('O'))
  103. self.assertEquals(pandas_df['c'].dtype, numpy.dtype('O'))
  104. def testRi2pandas_issue207(self):
  105. d = robjects.DataFrame({'x': 1})
  106. rpyp.activate()
  107. try:
  108. ok = True
  109. robjects.globalenv['d'] = d
  110. except ValueError:
  111. ok = False
  112. finally:
  113. rpyp.deactivate()
  114. if 'd' in robjects.globalenv:
  115. del(robjects.globalenv['d'])
  116. self.assertTrue(ok)
  117. def suite():
  118. if has_pandas:
  119. return unittest.TestLoader().loadTestsFromTestCase(PandasConversionsTestCase)
  120. else:
  121. return unittest.TestLoader().loadTestsFromTestCase(MissingPandasDummyTestCase)
  122. if __name__ == '__main__':
  123. unittest.main(defaultTest='suite')