Changeset: aefe683f0b5e for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=aefe683f0b5e Modified Files: monetdb5/extras/pyapi/Tests/pyapi02.malC monetdb5/extras/pyapi/pyapi.c Branch: pyapi Log Message:
Python API: mask array (working once, unclear why) diffs (207 lines): diff --git a/monetdb5/extras/pyapi/Tests/pyapi02.malC b/monetdb5/extras/pyapi/Tests/pyapi02.malC --- a/monetdb5/extras/pyapi/Tests/pyapi02.malC +++ b/monetdb5/extras/pyapi/Tests/pyapi02.malC @@ -8,16 +8,16 @@ bat.append(bbit,0:bit); bat.append(bbit,1:bit); bat.append(bbit,0:bit); bat.append(bbit,nil:bit); -rbit:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bbit); +rbit:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"print(arg1)\nreturn([numpy.add(arg1.filled(0),42)])",bbit); io.print(rbit); bbte:= bat.new(:oid,:bte); bat.append(bbte,42:bte); bat.append(bbte,84:bte); -bat.append(bbte,254:bte); +bat.append(bbte,111:bte); bat.append(bbte,0:bte); bat.append(bbte,nil:bte); -rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bbte); +rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([[42,43]])",bbte); io.print(rbte); bsht:= bat.new(:oid,:sht); @@ -26,7 +26,7 @@ bat.append(bsht,82:sht); bat.append(bsht,0:sht); bat.append(bsht,3276:sht); bat.append(bsht,nil:sht); -rsht:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bsht); +rsht:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bsht); io.print(rsht); bint:= bat.new(:oid,:int); @@ -35,7 +35,7 @@ bat.append(bint,846930886:int); bat.append(bint,1681692777:int); bat.append(bint,1714636915:int); bat.append(bint,nil:int); -rint:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bint); +rint:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bint); io.print(rint); bwrd:= bat.new(:oid,:wrd); @@ -43,7 +43,7 @@ bat.append(bwrd,1804289383:wrd); bat.append(bwrd,846930886:wrd); bat.append(bwrd,1681692777:wrd); bat.append(bwrd,1714636915:wrd); -rwrd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bwrd); +rwrd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bwrd); io.print(rwrd); blng:= bat.new(:oid,:lng); @@ -51,7 +51,7 @@ bat.append(blng,1804289383L); bat.append(blng,846930886L); bat.append(blng,1681692777L); bat.append(blng,1714636915L); -rlng:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",blng); +rlng:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",blng); io.print(rlng); # not sure what to with hge, numpy only supports 128 bits when sizeof(long)=16 @@ -89,7 +89,7 @@ bat.append(bstr,"asdf":str); bat.append(bstr,"sd asd asd asd asd a":str); bat.append(bstr,"":str); bat.append(bstr,nil:str); -rstr:bat[:oid,:int] := rapi.eval(nil:ptr,"unlist(lapply(arg1,nchar))",bstr); +rstr:bat[:oid,:int] := pyapi.eval(nil:ptr,"unlist(lapply(arg1,nchar))",bstr); io.print(rstr); @@ -103,26 +103,26 @@ bat.append(binto,1681692777:int); bat.append(binto,1714636915:int); bat.append(binto,nil:int); -rintbi:bat[:oid,:int] := rapi.eval(nil:ptr,"arg1",binto); +rintbi:bat[:oid,:int] := pyapi.eval(nil:ptr,"arg1",binto); io.print(rintbi); -rintbi2:bat[:oid,:int] := rapi.eval(nil:ptr,"as.integer(arg1)",binto); +rintbi2:bat[:oid,:int] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto); io.print(rintbi2); -rintbl:bat[:oid,:lng] := rapi.eval(nil:ptr,"as.integer(arg1)",binto); +rintbl:bat[:oid,:lng] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto); io.print(rintbl); -rintbh:bat[:oid,:hge] := rapi.eval(nil:ptr,"as.integer(arg1)",binto); +rintbh:bat[:oid,:hge] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto); io.print(rintbh); -rintbd:bat[:oid,:dbl] := rapi.eval(nil:ptr,"as.numeric(arg1)",binto); +rintbd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"as.numeric(arg1)",binto); io.print(rintbd); -rintbs:bat[:oid,:str] := rapi.eval(nil:ptr,"as.character(arg1)",binto); +rintbs:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.character(arg1)",binto); io.print(rintbs); # factors should be strings -rintbf:bat[:oid,:str] := rapi.eval(nil:ptr,"as.factor(arg1)",binto); +rintbf:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.factor(arg1)",binto); io.print(rintbf); diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -58,6 +58,7 @@ static int pyapiInitialized = FALSE; *(mtpe*) PyArray_GETPTR1(pCol, j); } \ BATsetcount(bat, cnt); } +// TODO: also handle the case if someone returns a masked array #define _PYAPI_DEBUG_ @@ -152,6 +153,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st // for each input column (BAT): for (i = pci->retc + 2; i < pci->argc; i++) { PyObject *vararray = NULL; + // null mask for masked array // turn scalars into one-valued BATs // TODO: also do this for Python? Or should scalar values be 'simple' variables? @@ -176,6 +178,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } } + switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) { case TYPE_bte: vararray = BAT_TO_NP(b, bte, NPY_INT8); @@ -195,16 +198,50 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st case TYPE_dbl: vararray = BAT_TO_NP(b, dbl, NPY_FLOAT64); break; - // TODO: handle NULLs! // TODO: implement other types (strings, boolean) default: msg = createException(MAL, "pyapi.eval", "unknown argument type "); goto wrapup; } + + // we use numpy.ma to deal with possible NULL values in the data + // once numpy comes with proper NA support, this will change + { + PyObject *mafunc = PyObject_GetAttrString(PyImport_Import( + PyString_FromString("numpy.ma")), "masked_array"); + PyObject *maargs = PyTuple_New(2); + PyArrayObject* nullmask = (PyArrayObject*) PyArray_ZEROS(1, + (npy_intp[1]) {BATcount(b)}, NPY_BOOL, 0); + + const void *nil = ATOMnilptr(b->ttype); + int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype); + BATiter bi = bat_iterator(b); + + if (b->T->nil) { + size_t j; + for (j = 0; j < BATcount(b); j++) { + if ((*atomcmp)(BUNtail(bi, BUNfirst(b) + j), nil) == 0) { + // Houston we have a NULL + PyArray_SETITEM(nullmask, PyArray_GETPTR1(nullmask, j), Py_True); + } + } + } + PyTuple_SetItem(maargs, 0, vararray); + PyTuple_SetItem(maargs, 1, (PyObject*) nullmask); + vararray = PyObject_CallObject(mafunc, maargs); + if (!vararray) { + msg = createException(MAL, "pyapi.eval", "UUUH"); + goto wrapup; + } + } + PyTuple_SetItem(pArgs, ai++, vararray); + + // TODO: we cannot clean this up just yet, there may be a shallow copy referenced in python. + // TODO: do this later + BBPunfix(b->batCacheid); - PyTuple_SetItem(pArgs, ai++, vararray); } // create argument list @@ -284,11 +321,13 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st msg = createException(MAL, "pyapi.eval", "Command too large"); goto wrapup; } - { int pyret; PyObject *pFunc, *pModule; + // TODO: does this create overhead?, see if we can share the import + PyRun_SimpleString("import numpy"); + pModule = PyImport_Import(PyString_FromString("__main__")); pyret = PyRun_SimpleString(pycall); pFunc = PyObject_GetAttrString(pModule, "pyfun"); @@ -298,9 +337,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st goto wrapup; } - // TODO: does this create overhead?, see if we can share the import - PyRun_SimpleString("import numpy"); - pResult = PyObject_CallObject(pFunc, pArgs); if (PyErr_Occurred()) { PyObject *pErrType, *pErrVal, *pErrTb; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list