rpy2 is awesome.
But I was disconcerted to discover passing a numpy array to an R
function just makes things blow up ("Nothing can be done for that
type...") -- who uses rpy without numpy? :-) So I fixed it.
Strategy:
* most numpy arrays are mapped to R arrays (shape is preserved)
* a numpy object array is treated like a python list, and mapped to an R list
* a numpy record array is mapped to an R data frame
The patch does not introduce any dependency on numpy, is against
current trunk, and includes complete tests.
Yes/no?
-- Nathaniel
Index: rpy/robjects/tests/__init__.py
===================================================================
--- rpy/robjects/tests/__init__.py (revision 696)
+++ rpy/robjects/tests/__init__.py (working copy)
@@ -8,6 +8,7 @@
import testRFunction
import testREnvironment
import testRobjects
+import testNumpyConversions
def suite():
suite_RObject = testRObject.suite()
@@ -18,6 +19,7 @@
suite_REnvironment = testREnvironment.suite()
suite_RFormula = testRFormula.suite()
suite_Robjects = testRobjects.suite()
+ suite_NumpyConversions = testNumpyConversions.suite()
alltests = unittest.TestSuite([suite_RObject,
suite_RVector,
suite_RArray,
@@ -25,7 +27,9 @@
suite_RFunction,
suite_REnvironment,
suite_RFormula,
- suite_Robjects ])
+ suite_Robjects,
+ suite_NumpyConversions,
+ ])
return alltests
def main():
Index: rpy/robjects/tests/testNumpyConversions.py
===================================================================
--- rpy/robjects/tests/testNumpyConversions.py (revision 0)
+++ rpy/robjects/tests/testNumpyConversions.py (revision 0)
@@ -0,0 +1,86 @@
+import unittest
+import rpy2.robjects as robjects
+r = robjects.r
+
+try:
+ import numpy
+except ImportError:
+ numpy = None
+
+class NumpyConversionsTestCase(unittest.TestCase):
+ def checkSimple(self, obj, mode, storage_mode):
+ converted = robjects.default_py2ri(obj)
+ self.assertEquals(r["mode"](converted)[0], mode)
+ self.assertEquals(r["storage.mode"](converted)[0], storage_mode)
+ self.assertEquals(list(obj), list(converted))
+ self.assertTrue(r["is.array"](converted)[0])
+
+ def testSimple(self):
+ if numpy is None:
+ return
+ b = numpy.array([True, False, True], dtype=numpy.bool_)
+ self.checkSimple(b, "logical", "logical")
+ i = numpy.array([1, 2, 3], dtype="i")
+ self.checkSimple(i, "numeric", "integer")
+ f = numpy.array([1, 2, 3], dtype="f")
+ self.checkSimple(f, "numeric", "double")
+ c = numpy.array([1j, 2j, 3j], dtype=numpy.complex_)
+ self.checkSimple(c, "complex", "complex")
+ s = numpy.array(["a", "b", "c"], dtype="S")
+ self.checkSimple(s, "character", "character")
+ u = numpy.array([u"a", u"b", u"c"], dtype="U")
+ self.checkSimple(u, "character", "character")
+
+ def testMultiDim(self):
+ if numpy is None:
+ return
+ i2d = numpy.array([[1, 2, 3], [4, 5, 6]], dtype="i")
+ i2d_r = robjects.default_py2ri(i2d)
+ self.assertEquals(r["storage.mode"](i2d_r)[0], "integer")
+ self.assertEquals(tuple(r["dim"](i2d_r)), (2, 3))
+ # Make sure we got the row/column swap right:
+ self.assertEquals(i2d_r.subset(1, 2)[0], i2d[0, 1])
+
+ f3d = numpy.arange(24, dtype="f").reshape((2, 3, 4))
+ f3d_r = robjects.default_py2ri(f3d)
+ self.assertEquals(r["storage.mode"](f3d_r)[0], "double")
+ self.assertEquals(tuple(r["dim"](f3d_r)), (2, 3, 4))
+ # Make sure we got the row/column swap right:
+ self.assertEquals(f3d_r.subset(1, 2, 3)[0], f3d[0, 1, 2])
+
+ def testObjectArray(self):
+ if numpy is None:
+ return
+ o = numpy.array([1, "a", 3.2], dtype=numpy.object_)
+ o_r = robjects.default_py2ri(o)
+ self.assertEquals(r["mode"](o_r)[0], "list")
+ self.assertEquals(r["[["](o_r, 1)[0], 1)
+ self.assertEquals(r["[["](o_r, 2)[0], "a")
+ self.assertEquals(r["[["](o_r, 3)[0], 3.2)
+
+ def testRecordArray(self):
+ if numpy is None:
+ return
+ rec = numpy.array([(1, 2.3), (2, -0.7), (3, 12.1)],
+ dtype=[("count", "i"), ("value", numpy.double)])
+ rec_r = robjects.default_py2ri(rec)
+ self.assertTrue(r["is.data.frame"](rec_r)[0])
+ self.assertEquals(tuple(r["names"](rec_r)), ("count", "value"))
+ count_r = r["$"](rec_r, "count")
+ value_r = r["$"](rec_r, "value")
+ self.assertEquals(r["storage.mode"](count_r)[0], "integer")
+ self.assertEquals(r["storage.mode"](value_r)[0], "double")
+ self.assertEquals(count_r[1], 2)
+ self.assertEquals(value_r[2], 12.1)
+
+ def testBadArray(self):
+ if numpy is None:
+ return
+ u = numpy.array([1, 2, 3], dtype=numpy.uint32)
+ self.assertRaises(ValueError, robjects.default_py2ri, u)
+
+def suite():
+ return unittest.TestLoader().loadTestsFromTestCase(NumpyConversionsTestCase)
+
+if __name__ == '__main__':
+ unittest.main()
Index: rpy/robjects/__init__.py
===================================================================
--- rpy/robjects/__init__.py (revision 696)
+++ rpy/robjects/__init__.py (working copy)
@@ -11,6 +11,10 @@
import itertools
import rpy2.rinterface as rinterface
import rpy2.rlike.container as rlc
+try:
+ import numpy
+except ImportError:
+ numpy = None
#FIXME: close everything when leaving (check RPy for that).
@@ -87,6 +91,50 @@
res = r.list(*[ri2py(py2ri(x)) for x in o])
elif isinstance(o, complex):
res = rinterface.SexpVector([o, ], rinterface.CPLXSXP)
+ elif numpy is not None and isinstance(o, numpy.ndarray):
+ if not o.dtype.isnative:
+ raise(ValueError("Cannot pass numpy arrays with non-native byte orders at the moment."))
+ # Record arrays map to data frames:
+ if o.dtype.kind == "V":
+ if o.dtype.names is None:
+ raise(ValueError("Nothing can be done for this numpy array type at the moment."))
+ # The possible kind codes are listed at
+ # http://numpy.scipy.org/array_interface.shtml
+ kinds = {
+ # "t" -> not really supported by numpy
+ "b": rinterface.LGLSXP,
+ "i": rinterface.INTSXP,
+ # "u" -> special-cased below
+ "f": rinterface.REALSXP,
+ "c": rinterface.CPLXSXP,
+ # "O" -> special-cased below
+ "S": rinterface.STRSXP,
+ "U": rinterface.STRSXP,
+ # "V" -> special-cased below
+ }
+ # Most types map onto R arrays:
+ if o.dtype.kind in kinds:
+ # "F" means "use column-major order"
+ vec = rinterface.SexpVector(o.ravel("F"), kinds[o.dtype.kind])
+ dim = rinterface.SexpVector(o.shape, rinterface.INTSXP)
+ res = r.array(vec, dim=dim)
+ # R does not support unsigned types:
+ elif o.dtype.kind == "u":
+ raise(ValueError("Cannot convert numpy array of unsigned values -- R does not have unsigned integers."))
+ # Array-of-PyObject is treated like a Python list:
+ elif o.dtype.kind == "O":
+ res = py2ri(list(o))
+ # Record arrays map onto R data frames:
+ elif o.dtype.kind == "V":
+ if o.dtype.names is None:
+ raise(ValueError("Nothing can be done for this numpy array type %s at the moment." % (o.dtype,)))
+ df_args = []
+ for field_name in o.dtype.names:
+ df_args.append((field_name, py2ri(o[field_name])))
+ res = baseNameSpaceEnv["data.frame"].rcall(tuple(df_args))
+ # It should be impossible to get here:
+ else:
+ raise(ValueError("Unknown numpy array type."))
else:
raise(ValueError("Nothing can be done for the type %s at the moment." %(type(o))))
return res
@@ -405,7 +453,7 @@
def __call__(self, *args, **kwargs):
new_args = [py2ri(a) for a in args]
- new_kwargs = {}
+ new_kwargs = {}
for k, v in kwargs.iteritems():
new_kwargs[k] = py2ri(v)
res = super(RFunction, self).__call__(*new_args, **new_kwargs)
@@ -500,7 +548,7 @@
def __getitem__(self, item):
res = rinterface.globalEnv.get(item)
- res = ri2py(res)
+ res = ri2py(res)
return res
#FIXME: check that this is properly working
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
rpy-list mailing list
rpy-list@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rpy-list