rpy2_w32_fix /rpy/robjects/tests/testPandasConversions.py

Language Python Lines 140
MD5 Hash 39073e73a8210db9c06e251ecf1b7e43 Estimated Cost $2,890 (why?)
Repository https://bitbucket.org/breisfeld/rpy2_w32_fix View Raw File View Project SPDX
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import unittest
import rpy2.robjects as robjects
from rpy2.robjects import conversion
import rpy2.rinterface as rinterface

from collections import OrderedDict
from datetime import datetime

has_pandas = True
try:
    import pandas
    import numpy
    has_pandas = True
except:
    has_pandas = False

if has_pandas:
    import rpy2.robjects.pandas2ri as rpyp

@unittest.skipUnless(has_pandas, "The Python package is not installed: functionalities associated with it cannot be tested.")
class PandasConversionsTestCase(unittest.TestCase):

    def testActivate(self):
        #FIXME: is the following still making sense ?
        self.assertNotEqual(rpyp.py2ri, robjects.conversion.py2ri)
        l = len(robjects.conversion.py2ri.registry)
        k = set(robjects.conversion.py2ri.registry.keys())
        rpyp.activate()
        self.assertTrue(len(conversion.py2ri.registry) > l)
        rpyp.deactivate()
        self.assertEqual(l, len(conversion.py2ri.registry))
        self.assertEqual(k, set(conversion.py2ri.registry.keys()))

    def testActivateTwice(self):
        #FIXME: is the following still making sense ?
        self.assertNotEqual(rpyp.py2ri, robjects.conversion.py2ri)
        l = len(robjects.conversion.py2ri.registry)
        k = set(robjects.conversion.py2ri.registry.keys())
        rpyp.activate()
        rpyp.deactivate()
        rpyp.activate()
        self.assertTrue(len(conversion.py2ri.registry) > l)
        rpyp.deactivate()
        self.assertEqual(l, len(conversion.py2ri.registry))
        self.assertEqual(k, set(conversion.py2ri.registry.keys()))

    def testDataFrame(self):
        l = (('b', numpy.array([True, False, True], dtype=numpy.bool_)),
             ('i', numpy.array([1, 2, 3], dtype="i")),
             ('f', numpy.array([1, 2, 3], dtype="f")),
             ('s', numpy.array(["a", "b", "c"], dtype="S")),
             ('u', numpy.array([u"a", u"b", u"c"], dtype="U")),
             ('dates', [datetime(2012, 5, 2), 
                        datetime(2012, 6, 3), 
                        datetime(2012, 7, 1)]))
        od = OrderedDict(l)
        pd_df = pandas.core.frame.DataFrame(od)
        rpyp.activate()
        rp_df = robjects.conversion.py2ri(pd_df)
        rpyp.deactivate()
        self.assertEqual(pd_df.shape[0], rp_df.nrow)
        self.assertEqual(pd_df.shape[1], rp_df.ncol)

    def testSeries(self):
        Series = pandas.core.series.Series
        s = Series(numpy.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
        rpyp.activate()
        rp_s = robjects.conversion.py2ri(s)
        rpyp.deactivate()
        self.assertEqual(rinterface.FloatSexpVector, type(rp_s))

    def testSeries_issue264(self):
        Series = pandas.core.series.Series
        s = Series(('a', 'b', 'c', 'd', 'e'),
                   index=pandas.Int64Index([0,1,2,3,4]))
        rpyp.activate()
        rp_s = robjects.conversion.py2ri(s)
        rpyp.deactivate()
        # segfault before the fix
        str(rp_s)
        self.assertEqual(rinterface.ListSexpVector, type(rp_s))

    def testCategorical(self):
        factor = robjects.vectors.FactorVector(('a', 'b', 'a'))
        rpyp.activate()
        rp_c = robjects.conversion.ri2py(factor)
        rpyp.deactivate()
        self.assertEqual(pandas.Categorical, type(rp_c))

    def testRepr(self):
        # this should go to testVector, with other tests for repr()
        l = (('b', numpy.array([True, False, True], dtype=numpy.bool_)),
             ('i', numpy.array([1, 2, 3], dtype="i")),
             ('f', numpy.array([1, 2, 3], dtype="f")),
             ('s', numpy.array(["a", "b", "c"], dtype="S")),
             ('u', numpy.array([u"a", u"b", u"c"], dtype="U")))
        od = OrderedDict(l)
        pd_df = pandas.core.frame.DataFrame(od)
        rpyp.activate()
        rp_df = robjects.conversion.py2ri(pd_df)
        rpyp.deactivate()
        s = repr(rp_df) # used to fail with a TypeError
        s = s.split('\n')
        self.assertEqual('[Array, Array, Array, FactorV..., FactorV...]', s[1].strip())

    def testRi2pandas(self):
        rdataf = robjects.r('data.frame(a=1:2, b=I(c("a", "b")), c=c("a", "b"))')
        rpyp.activate()
        pandas_df = robjects.conversion.ri2py(rdataf)
        rpyp.deactivate()
        self.assertIsInstance(pandas_df, pandas.DataFrame)
        self.assertEquals(('a', 'b', 'c'), tuple(pandas_df.keys()))
        self.assertEquals(pandas_df['a'].dtype, numpy.dtype('int32'))
        self.assertEquals(pandas_df['b'].dtype, numpy.dtype('O'))
        self.assertEquals(pandas_df['c'].dtype, numpy.dtype('O'))
    
    def testRi2pandas_issue207(self):
        d = robjects.DataFrame({'x': 1})
        rpyp.activate()
        try:
            ok = True
            robjects.globalenv['d'] = d
        except ValueError:
            ok = False
        finally:
            rpyp.deactivate()
            if 'd' in robjects.globalenv:
                del(robjects.globalenv['d'])
        self.assertTrue(ok)

def suite():
    if has_pandas:
        return unittest.TestLoader().loadTestsFromTestCase(PandasConversionsTestCase)
    else:
        return unittest.TestLoader().loadTestsFromTestCase(MissingPandasDummyTestCase)

if __name__ == '__main__':
    unittest.main(defaultTest='suite')
Back to Top