Changeset: 521aa4d78816 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=521aa4d78816 Modified Files: configure.ag monetdb5/extras/pyapi/Tests/pyapi00.malC monetdb5/extras/pyapi/pyapi.c Branch: pyapi Log Message:
Python Integration: NumPy arrays part 1 diffs (205 lines): diff --git a/configure.ag b/configure.ag --- a/configure.ag +++ b/configure.ag @@ -2279,7 +2279,7 @@ if test "x$enable_pyintegration" != xno; enable_pyintegration=no disable_pyintegration="(python-config command not found)" fi - elif libpy_CFLAGS=`$PYCMD --includes ` && libpy_LIBS=`$PYCMD --ldflags`; then + elif libpy_CFLAGS=`\`$PYCMD --exec-prefix\`/bin/python -c "from distutils.sysconfig import get_python_inc; import numpy; print ' -I' + get_python_inc() + ' -I' + numpy.get_include();"` && libpy_LIBS=`$PYCMD --ldflags`; then have_libpy=yes AC_DEFINE(HAVE_LIBPY, 1, [Define if we can link to python]) AC_SUBST(libpy_CFLAGS, $libpy_CFLAGS) diff --git a/monetdb5/extras/pyapi/Tests/pyapi00.malC b/monetdb5/extras/pyapi/Tests/pyapi00.malC --- a/monetdb5/extras/pyapi/Tests/pyapi00.malC +++ b/monetdb5/extras/pyapi/Tests/pyapi00.malC @@ -12,7 +12,7 @@ bat.append(b,1189641421); io.print(b); -r:bat[:oid,:int] := pyapi.eval(nil:ptr,"int(arg1/1000)",b); +r:bat[:oid,:int] := pyapi.eval(nil:ptr,"r=[e**2 for e in arg1]\nreturn ([numpy.asarray(r)])",b); io.print(r); diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -21,7 +21,9 @@ #undef _POSIX_C_SOURCE #include <Python.h> -// other headers +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include <numpy/arrayobject.h> + #include <string.h> const char* pyapi_enableflag = "embedded_py"; @@ -64,16 +66,12 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st size_t pos; char* rcall = NULL; size_t rcalllen; - size_t ret_rows = 0; - //int ret_cols = 0; /* int because pci->retc is int, too*/ str *args; - //int evalErr; char *msg = MAL_SUCCEED; BAT *b = NULL; - BUN cnt; node * argnode; int seengrp = FALSE; - PyObject *pArgs; // this is going to be the parameter tuple + PyObject *pArgs, *pResult; // this is going to be the parameter tuple if (!PyAPIEnabled()) { throw(MAL, "pyapi.eval", @@ -106,7 +104,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st argnode = argnode->next; } } - pArgs = PyTuple_New(pci->argc - pci->retc + 2); // the first unknown argument is the group, we don't really care for the rest. for (i = pci->retc + 2; i < pci->argc; i++) { @@ -121,10 +118,12 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } } + // create function argument tuple, we pass a tuple of numpy arrays + pArgs = PyTuple_New(pci->argc-(pci->retc + 2)); + // for each input column (BAT): for (i = pci->retc + 2; i < pci->argc; i++) { - PyObject *varlist = NULL; - size_t j; + PyObject *vararray = NULL; // turn scalars into one-valued BATs // TODO: also do this for Python? Or should scalar values be 'simple' variables? @@ -149,19 +148,14 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } } - varlist = PyList_New(BATcount(b)); switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) { case TYPE_int: - // BAT_TO_INTSXP(b, int, varvalue); - for (j = 0; j < BATcount(b); j++) { - int v = ((int*) Tloc(b, BUNfirst(b)))[j]; - //if ( v == int_nil) - // PyList_SET_ITEM(varlist, j, ); - //else - // TODO: use numpy arrays here, readonly, ignore NULLs for now? - PyList_SET_ITEM(varlist, j, PyInt_FromLong(v)); - } + // yeah yeah yeah + vararray = PyArray_New(&PyArray_Type, 1, (npy_intp[1]) {BATcount(b)}, NPY_INT32, NULL, + (int*) Tloc(b, BUNfirst(b)), 0, NPY_ARRAY_CARRAY || !NPY_ARRAY_WRITEABLE, NULL); break; + // TODO: handle NULLs! + // TODO: implement other types default: msg = createException(MAL, "pyapi.eval", "unknown argument type "); @@ -169,7 +163,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } BBPunfix(b->batCacheid); - PyTuple_SetItem(pArgs, ai++, varlist); + PyTuple_SetItem(pArgs, ai++, vararray); } pos = 0; @@ -192,20 +186,19 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st #endif // parse the code and create the function - // TODO: do this in a temporary namespace, how? + // TODO: do this in a temporary namespace? Later... + // TODO: actually include user code - // TODO: use numpy arrays for columns! - // TODO: How do we make nice indentation so it parses? Force user code to use level-1 indentation? + // TODO: Indent user code: Search for newline-tab, if there, add tabs, if not, add single space in front of every line. Thanks Sjoerd! { int pyret; - PyObject *pFunc, *pModule, *pResult; - + PyObject *pFunc, *pModule; + // TODO: check whether this succeeds pModule = PyImport_Import(PyString_FromString("__main__")); - pyret = PyRun_SimpleString("def pyfun(x):\n print(x)\n return list(([e+1 for e in x],1))"); + pyret = PyRun_SimpleString("def pyfun(x):\n import numpy as np\n r=[e+1 for e in x]\n return ([np.asarray(r)])"); pFunc = PyObject_GetAttrString(pModule, "pyfun"); if (pyret != 0 || !pModule || !pFunc || !PyCallable_Check(pFunc)) { - // TODO: include parsed code msg = createException(MAL, "pyapi.eval", "could not parse Python code %s", rcall); goto wrapup; // shudder } @@ -213,8 +206,8 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st // TODO: use other interface, here we can assign each value. We know how many params there will be // this is it pResult = PyObject_CallObject(pFunc, pArgs); - if (!pResult || !PyList_Check(pResult) || !PyList_Size(pResult)) { - msg = createException(MAL, "pyapi.eval", "invalid result object"); + if (!pResult || !PyList_Check(pResult) || PyList_Size(pResult) != pci->retc) { + msg = createException(MAL, "pyapi.eval", "Invalid result object. Need list of size %d containing numpy arrays", pci->retc); goto wrapup; } // delete the function again @@ -223,12 +216,33 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st // collect the return values for (i = 0; i < pci->retc; i++) { + PyObject * pColO = PyList_GetItem(pResult, i); int bat_type = ATOMstorage(getColumnType(getArgType(mb,pci,i))); - cnt = (BUN) ret_rows; switch (bat_type) { case TYPE_int: { - // TODO + int *p; + BUN j; + // this only copies if it has to + PyArrayObject* pCol = (PyArrayObject*) PyArray_FromAny(pColO, + PyArray_DescrFromType(NPY_INT32), 1, 1, NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, NULL); + //size_t cnt = pCol->dimensions[0]; + size_t cnt = 5; + // TODO: get actual length from array + + // TODO null rewriting, we are guaranteed to be able to write to this + // TODO: only accepted masked array as output? + // TODO check whether the length of our output + + /* We would like to simply pass over the BAT from numpy, + * but cannot due to malloc/free incompatibility */ + b = BATnew(TYPE_void, TYPE_int, cnt, TRANSIENT); + BATseqbase(b, 0); b->T->nil = 0; b->T->nonil = 1; b->tkey = 0; + b->tsorted = 0; b->trevsorted = 0; + p = (int*) Tloc(b, BUNfirst(b)); \ + for( j =0; j< cnt; j++, p++){ + *p = (int) PyArray_GETPTR1(pCol, j); + } break; } // TODO: implement other types @@ -239,7 +253,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st bat_type); goto wrapup; } - BATsetcount(b, cnt); // bat return if (isaBatType(getArgType(mb,pci,i))) { @@ -265,7 +278,9 @@ str PyAPIprelude(void *ret) { MT_lock_set(&pyapiLock, "pyapi.evaluate"); /* startup internal Python environment */ if (!pyapiInitialized) { + char* iar = NULL; Py_Initialize(); + import_array1(iar); pyapiInitialized++; } MT_lock_unset(&pyapiLock, "pyapi.evaluate"); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list