rpy2_w32_fix /rpy/robjects/pandas2ri.py

Language Python Lines 166
MD5 Hash 70a6c53486b819d3093e6a21d150f5b9 Estimated Cost $2,993 (why?)
Repository https://bitbucket.org/breisfeld/rpy2_w32_fix View Raw File View Project SPDX
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import rpy2.robjects as ro
import rpy2.robjects.conversion as conversion
import rpy2.rinterface as rinterface
from rpy2.rinterface import SexpVector, INTSXP

from pandas.core.frame import DataFrame as PandasDataFrame
from pandas.core.series import Series as PandasSeries
from pandas.core.index import Index as PandasIndex
import pandas
from numpy import recarray
import numpy

from collections import OrderedDict
from rpy2.robjects.vectors import (DataFrame,
                                   Vector,
                                   ListVector,
                                   StrVector,
                                   IntVector,
                                   POSIXct)
from rpy2.rinterface import (IntSexpVector,
                             ListSexpVector)
original_converter = None 

# pandas is requiring numpy. We add the numpy conversion will be
# activate in the function activate() below
import rpy2.robjects.numpy2ri as numpy2ri


ISOdatetime = rinterface.baseenv['ISOdatetime']

converter = conversion.Converter('original pandas conversion')
py2ri = converter.py2ri
py2ro = converter.py2ro
ri2py = converter.ri2py
ri2ro = converter.ri2ro

@py2ri.register(PandasDataFrame)
def py2ri_pandasdataframe(obj):
    od = OrderedDict()
    for name, values in obj.iteritems():
        if values.dtype.kind == 'O':
            od[name] = StrVector(values)
        else:
            od[name] = conversion.py2ri(values)
    return DataFrame(od)


@py2ri.register(PandasIndex)
def py2ri_pandasindex(obj):
    if obj.dtype.kind == 'O':
        return StrVector(obj)
    else:
        # pandas2ri should definitely not have to know which paths remain to be
        # converted by numpy2ri
        # Answer: the thing is that pandas2ri builds on the conversion
        # rules defined by numpy2ri - deferring to numpy2ri is allowing
        # us to reuse that code.
        return numpy2ri.numpy2ri(obj)

@py2ri.register(PandasSeries)
def py2ri_pandasseries(obj):
    if obj.dtype == '<M8[ns]':
        # time series
        d = [IntVector([x.year for x in obj]),
             IntVector([x.month for x in obj]),
             IntVector([x.day for x in obj]),
             IntVector([x.hour for x in obj]),
             IntVector([x.minute for x in obj]),
             IntVector([x.second for x in obj])]
        res = ISOdatetime(*d)
        #FIXME: can the POSIXct be created from the POSIXct constructor ?
        # (is '<M8[ns]' mapping to Python datetime.datetime ?)
        res = POSIXct(res)
    else:
        # converted as a numpy array
        res = numpy2ri.numpy2ri(obj.values)
    # "index" is equivalent to "names" in R
    if obj.ndim == 1:
        res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index)))
    else:
        res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index)))
    return res

@ri2py.register(SexpVector)
def ri2py_vector(obj):
    res = numpy2ri.ri2py(obj)
    return res
    
@ri2py.register(IntSexpVector)
def ri2py_intvector(obj):
    # special case for factors
    if 'factor' in obj.rclass:
        res = pandas.Categorical.from_codes(numpy.asarray(obj) - 1,
                                            categories = obj.do_slot('levels'),
                                            ordered = 'ordered' in obj.rclass)
    else:
        res = numpy2ri.ri2py(obj)
    return res

@ri2py.register(ListSexpVector)
def ri2py_listvector(obj):        
    if 'data.frame' in obj.rclass:
        res = ri2py.registry[DataFrame](obj)
    else:
        res = numpy2ri.ri2py(obj)
    return res

@ri2py.register(DataFrame)
def ri2py_dataframe(obj):
    # use the numpy converter
    recarray = numpy2ri.ri2py(obj)
    try:
        idx = numpy2ri.ri2py(obj.do_slot('row.names'))
    except LookupError as le:
        idx = None
    res = PandasDataFrame.from_records(recarray,
                                       index=idx)
    return res

def activate():
    global original_converter
    # If module is already activated, there is nothing to do
    if original_converter is not None: 
        return

    original_converter = conversion.Converter('snapshot before pandas conversion',
                                              template=conversion.converter)
    numpy2ri.activate()
    new_converter = conversion.Converter('snapshot before pandas conversion',
                                         template=conversion.converter)
    numpy2ri.deactivate()

    for k,v in py2ri.registry.items():
        if k is object:
            continue
        new_converter.py2ri.register(k, v)

    for k,v in ri2ro.registry.items():
        if k is object:
            continue
        new_converter.ri2ro.register(k, v)
    
    for k,v in py2ro.registry.items():
        if k is object:
            continue
        new_converter.py2ro.register(k, v)

    for k,v in ri2py.registry.items():
        if k is object:
            continue
        new_converter.ri2py.register(k, v)

    conversion.set_conversion(new_converter)

def deactivate():
    global original_converter

    # If module has never been activated or already deactivated,
    # there is nothing to do
    if original_converter is None:
        return

    conversion.set_conversion(original_converter)
    original_converter = None
Back to Top