PageRenderTime 63ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/rpy/robjects/pandas2ri.py

https://bitbucket.org/breisfeld/rpy2_w32_fix
Python | 165 lines | 123 code | 25 blank | 17 comment | 32 complexity | 70a6c53486b819d3093e6a21d150f5b9 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause
  1. import rpy2.robjects as ro
  2. import rpy2.robjects.conversion as conversion
  3. import rpy2.rinterface as rinterface
  4. from rpy2.rinterface import SexpVector, INTSXP
  5. from pandas.core.frame import DataFrame as PandasDataFrame
  6. from pandas.core.series import Series as PandasSeries
  7. from pandas.core.index import Index as PandasIndex
  8. import pandas
  9. from numpy import recarray
  10. import numpy
  11. from collections import OrderedDict
  12. from rpy2.robjects.vectors import (DataFrame,
  13. Vector,
  14. ListVector,
  15. StrVector,
  16. IntVector,
  17. POSIXct)
  18. from rpy2.rinterface import (IntSexpVector,
  19. ListSexpVector)
  20. original_converter = None
  21. # pandas is requiring numpy. We add the numpy conversion will be
  22. # activate in the function activate() below
  23. import rpy2.robjects.numpy2ri as numpy2ri
  24. ISOdatetime = rinterface.baseenv['ISOdatetime']
  25. converter = conversion.Converter('original pandas conversion')
  26. py2ri = converter.py2ri
  27. py2ro = converter.py2ro
  28. ri2py = converter.ri2py
  29. ri2ro = converter.ri2ro
  30. @py2ri.register(PandasDataFrame)
  31. def py2ri_pandasdataframe(obj):
  32. od = OrderedDict()
  33. for name, values in obj.iteritems():
  34. if values.dtype.kind == 'O':
  35. od[name] = StrVector(values)
  36. else:
  37. od[name] = conversion.py2ri(values)
  38. return DataFrame(od)
  39. @py2ri.register(PandasIndex)
  40. def py2ri_pandasindex(obj):
  41. if obj.dtype.kind == 'O':
  42. return StrVector(obj)
  43. else:
  44. # pandas2ri should definitely not have to know which paths remain to be
  45. # converted by numpy2ri
  46. # Answer: the thing is that pandas2ri builds on the conversion
  47. # rules defined by numpy2ri - deferring to numpy2ri is allowing
  48. # us to reuse that code.
  49. return numpy2ri.numpy2ri(obj)
  50. @py2ri.register(PandasSeries)
  51. def py2ri_pandasseries(obj):
  52. if obj.dtype == '<M8[ns]':
  53. # time series
  54. d = [IntVector([x.year for x in obj]),
  55. IntVector([x.month for x in obj]),
  56. IntVector([x.day for x in obj]),
  57. IntVector([x.hour for x in obj]),
  58. IntVector([x.minute for x in obj]),
  59. IntVector([x.second for x in obj])]
  60. res = ISOdatetime(*d)
  61. #FIXME: can the POSIXct be created from the POSIXct constructor ?
  62. # (is '<M8[ns]' mapping to Python datetime.datetime ?)
  63. res = POSIXct(res)
  64. else:
  65. # converted as a numpy array
  66. res = numpy2ri.numpy2ri(obj.values)
  67. # "index" is equivalent to "names" in R
  68. if obj.ndim == 1:
  69. res.do_slot_assign('names', StrVector(tuple(str(x) for x in obj.index)))
  70. else:
  71. res.do_slot_assign('dimnames', SexpVector(conversion.py2ri(obj.index)))
  72. return res
  73. @ri2py.register(SexpVector)
  74. def ri2py_vector(obj):
  75. res = numpy2ri.ri2py(obj)
  76. return res
  77. @ri2py.register(IntSexpVector)
  78. def ri2py_intvector(obj):
  79. # special case for factors
  80. if 'factor' in obj.rclass:
  81. res = pandas.Categorical.from_codes(numpy.asarray(obj) - 1,
  82. categories = obj.do_slot('levels'),
  83. ordered = 'ordered' in obj.rclass)
  84. else:
  85. res = numpy2ri.ri2py(obj)
  86. return res
  87. @ri2py.register(ListSexpVector)
  88. def ri2py_listvector(obj):
  89. if 'data.frame' in obj.rclass:
  90. res = ri2py.registry[DataFrame](obj)
  91. else:
  92. res = numpy2ri.ri2py(obj)
  93. return res
  94. @ri2py.register(DataFrame)
  95. def ri2py_dataframe(obj):
  96. # use the numpy converter
  97. recarray = numpy2ri.ri2py(obj)
  98. try:
  99. idx = numpy2ri.ri2py(obj.do_slot('row.names'))
  100. except LookupError as le:
  101. idx = None
  102. res = PandasDataFrame.from_records(recarray,
  103. index=idx)
  104. return res
  105. def activate():
  106. global original_converter
  107. # If module is already activated, there is nothing to do
  108. if original_converter is not None:
  109. return
  110. original_converter = conversion.Converter('snapshot before pandas conversion',
  111. template=conversion.converter)
  112. numpy2ri.activate()
  113. new_converter = conversion.Converter('snapshot before pandas conversion',
  114. template=conversion.converter)
  115. numpy2ri.deactivate()
  116. for k,v in py2ri.registry.items():
  117. if k is object:
  118. continue
  119. new_converter.py2ri.register(k, v)
  120. for k,v in ri2ro.registry.items():
  121. if k is object:
  122. continue
  123. new_converter.ri2ro.register(k, v)
  124. for k,v in py2ro.registry.items():
  125. if k is object:
  126. continue
  127. new_converter.py2ro.register(k, v)
  128. for k,v in ri2py.registry.items():
  129. if k is object:
  130. continue
  131. new_converter.ri2py.register(k, v)
  132. conversion.set_conversion(new_converter)
  133. def deactivate():
  134. global original_converter
  135. # If module has never been activated or already deactivated,
  136. # there is nothing to do
  137. if original_converter is None:
  138. return
  139. conversion.set_conversion(original_converter)
  140. original_converter = None