rpy2 is awesome.

But I was disconcerted to discover passing a numpy array to an R
function just makes things blow up ("Nothing can be done for that
type...") -- who uses rpy without numpy? :-)  So I fixed it.

Strategy:
 * most numpy arrays are mapped to R arrays (shape is preserved)
 * a numpy object array is treated like a python list, and mapped to an R list
 * a numpy record array is mapped to an R data frame
The patch does not introduce any dependency on numpy, is against
current trunk, and includes complete tests.

Yes/no?

-- Nathaniel
Index: rpy/robjects/tests/__init__.py
===================================================================
--- rpy/robjects/tests/__init__.py	(revision 696)
+++ rpy/robjects/tests/__init__.py	(working copy)
@@ -8,6 +8,7 @@
 import testRFunction
 import testREnvironment
 import testRobjects
+import testNumpyConversions
 
 def suite():
     suite_RObject = testRObject.suite()
@@ -18,6 +19,7 @@
     suite_REnvironment = testREnvironment.suite()
     suite_RFormula = testRFormula.suite()
     suite_Robjects = testRobjects.suite()
+    suite_NumpyConversions = testNumpyConversions.suite()
     alltests = unittest.TestSuite([suite_RObject,
                                    suite_RVector,                   
                                    suite_RArray,
@@ -25,7 +27,9 @@
                                    suite_RFunction,
                                    suite_REnvironment,
                                    suite_RFormula,
-                                   suite_Robjects ])
+                                   suite_Robjects,
+                                   suite_NumpyConversions,
+                                   ])
     return alltests
 
 def main():
Index: rpy/robjects/tests/testNumpyConversions.py
===================================================================
--- rpy/robjects/tests/testNumpyConversions.py	(revision 0)
+++ rpy/robjects/tests/testNumpyConversions.py	(revision 0)
@@ -0,0 +1,86 @@
+import unittest
+import rpy2.robjects as robjects
+r = robjects.r
+
+try:
+    import numpy
+except ImportError:
+    numpy = None
+
+class NumpyConversionsTestCase(unittest.TestCase):
+    def checkSimple(self, obj, mode, storage_mode):
+        converted = robjects.default_py2ri(obj)
+        self.assertEquals(r["mode"](converted)[0], mode)
+        self.assertEquals(r["storage.mode"](converted)[0], storage_mode)
+        self.assertEquals(list(obj), list(converted))
+        self.assertTrue(r["is.array"](converted)[0])
+
+    def testSimple(self):
+        if numpy is None:
+            return
+        b = numpy.array([True, False, True], dtype=numpy.bool_)
+        self.checkSimple(b, "logical", "logical")
+        i = numpy.array([1, 2, 3], dtype="i")
+        self.checkSimple(i, "numeric", "integer")
+        f = numpy.array([1, 2, 3], dtype="f")
+        self.checkSimple(f, "numeric", "double")
+        c = numpy.array([1j, 2j, 3j], dtype=numpy.complex_)
+        self.checkSimple(c, "complex", "complex")
+        s = numpy.array(["a", "b", "c"], dtype="S")
+        self.checkSimple(s, "character", "character")
+        u = numpy.array([u"a", u"b", u"c"], dtype="U")
+        self.checkSimple(u, "character", "character")
+
+    def testMultiDim(self):
+        if numpy is None:
+            return
+        i2d = numpy.array([[1, 2, 3], [4, 5, 6]], dtype="i")
+        i2d_r = robjects.default_py2ri(i2d)
+        self.assertEquals(r["storage.mode"](i2d_r)[0], "integer")
+        self.assertEquals(tuple(r["dim"](i2d_r)), (2, 3))
+        # Make sure we got the row/column swap right:
+        self.assertEquals(i2d_r.subset(1, 2)[0], i2d[0, 1])
+
+        f3d = numpy.arange(24, dtype="f").reshape((2, 3, 4))
+        f3d_r = robjects.default_py2ri(f3d)
+        self.assertEquals(r["storage.mode"](f3d_r)[0], "double")
+        self.assertEquals(tuple(r["dim"](f3d_r)), (2, 3, 4))
+        # Make sure we got the row/column swap right:
+        self.assertEquals(f3d_r.subset(1, 2, 3)[0], f3d[0, 1, 2])
+
+    def testObjectArray(self):
+        if numpy is None:
+            return
+        o = numpy.array([1, "a", 3.2], dtype=numpy.object_)
+        o_r = robjects.default_py2ri(o)
+        self.assertEquals(r["mode"](o_r)[0], "list")
+        self.assertEquals(r["[["](o_r, 1)[0], 1)
+        self.assertEquals(r["[["](o_r, 2)[0], "a")
+        self.assertEquals(r["[["](o_r, 3)[0], 3.2)
+
+    def testRecordArray(self):
+        if numpy is None:
+            return
+        rec = numpy.array([(1, 2.3), (2, -0.7), (3, 12.1)],
+                          dtype=[("count", "i"), ("value", numpy.double)])
+        rec_r = robjects.default_py2ri(rec)
+        self.assertTrue(r["is.data.frame"](rec_r)[0])
+        self.assertEquals(tuple(r["names"](rec_r)), ("count", "value"))
+        count_r = r["$"](rec_r, "count")
+        value_r = r["$"](rec_r, "value")
+        self.assertEquals(r["storage.mode"](count_r)[0], "integer")
+        self.assertEquals(r["storage.mode"](value_r)[0], "double")
+        self.assertEquals(count_r[1], 2)
+        self.assertEquals(value_r[2], 12.1)
+
+    def testBadArray(self):
+        if numpy is None:
+            return
+        u = numpy.array([1, 2, 3], dtype=numpy.uint32)
+        self.assertRaises(ValueError, robjects.default_py2ri, u)
+
+def suite():
+    return unittest.TestLoader().loadTestsFromTestCase(NumpyConversionsTestCase)
+
+if __name__ == '__main__':
+    unittest.main()
Index: rpy/robjects/__init__.py
===================================================================
--- rpy/robjects/__init__.py	(revision 696)
+++ rpy/robjects/__init__.py	(working copy)
@@ -11,6 +11,10 @@
 import itertools
 import rpy2.rinterface as rinterface
 import rpy2.rlike.container as rlc
+try:
+    import numpy
+except ImportError:
+    numpy = None
 
 #FIXME: close everything when leaving (check RPy for that).
 
@@ -87,6 +91,50 @@
         res = r.list(*[ri2py(py2ri(x)) for x in o])
     elif isinstance(o, complex):
         res = rinterface.SexpVector([o, ], rinterface.CPLXSXP)
+    elif numpy is not None and isinstance(o, numpy.ndarray):
+        if not o.dtype.isnative:
+            raise(ValueError("Cannot pass numpy arrays with non-native byte orders at the moment."))
+        # Record arrays map to data frames:
+        if o.dtype.kind == "V":
+            if o.dtype.names is None:
+                raise(ValueError("Nothing can be done for this numpy array type at the moment."))
+        # The possible kind codes are listed at
+        #   http://numpy.scipy.org/array_interface.shtml
+        kinds = {
+            # "t" -> not really supported by numpy
+            "b": rinterface.LGLSXP,
+            "i": rinterface.INTSXP,
+            # "u" -> special-cased below
+            "f": rinterface.REALSXP,
+            "c": rinterface.CPLXSXP,
+            # "O" -> special-cased below
+            "S": rinterface.STRSXP,
+            "U": rinterface.STRSXP,
+            # "V" -> special-cased below
+            }
+        # Most types map onto R arrays:
+        if o.dtype.kind in kinds:
+            # "F" means "use column-major order"
+            vec = rinterface.SexpVector(o.ravel("F"), kinds[o.dtype.kind])
+            dim = rinterface.SexpVector(o.shape, rinterface.INTSXP)
+            res = r.array(vec, dim=dim)
+        # R does not support unsigned types:
+        elif o.dtype.kind == "u":
+            raise(ValueError("Cannot convert numpy array of unsigned values -- R does not have unsigned integers."))
+        # Array-of-PyObject is treated like a Python list:
+        elif o.dtype.kind == "O":
+            res = py2ri(list(o))
+        # Record arrays map onto R data frames:
+        elif o.dtype.kind == "V":
+            if o.dtype.names is None:
+                raise(ValueError("Nothing can be done for this numpy array type %s at the moment." % (o.dtype,)))
+            df_args = []
+            for field_name in o.dtype.names:
+                df_args.append((field_name, py2ri(o[field_name])))
+            res = baseNameSpaceEnv["data.frame"].rcall(tuple(df_args))
+        # It should be impossible to get here:
+        else:
+            raise(ValueError("Unknown numpy array type."))
     else:
         raise(ValueError("Nothing can be done for the type %s at the moment." %(type(o))))
     return res
@@ -405,7 +453,7 @@
 
     def __call__(self, *args, **kwargs):
         new_args = [py2ri(a) for a in args]
-	new_kwargs = {}
+        new_kwargs = {}
         for k, v in kwargs.iteritems():
             new_kwargs[k] = py2ri(v)
         res = super(RFunction, self).__call__(*new_args, **new_kwargs)
@@ -500,7 +548,7 @@
     def __getitem__(self, item):
         res = rinterface.globalEnv.get(item)
             
-	res = ri2py(res)
+        res = ri2py(res)
         return res
 
     #FIXME: check that this is properly working
-------------------------------------------------------------------------
This SF.Net email is sponsored by the Moblin Your Move Developer's challenge
Build the coolest Linux based applications with Moblin SDK & win great prizes
Grand prize is a trip for two to an Open Source event anywhere in the world
http://moblin-contest.org/redirect.php?banner_id=100&url=/
_______________________________________________
rpy-list mailing list
rpy-list@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/rpy-list

Reply via email to