Changeset: 6fe592bdf6cc for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6fe592bdf6cc Added Files: monetdb5/extras/pyapi/Tests/pyapi01.malC Modified Files: monetdb5/extras/pyapi/Tests/All monetdb5/extras/pyapi/Tests/pyapi02.malC monetdb5/extras/pyapi/pyapi.c Branch: pyapi Log Message:
Fixed strings to work properly. Fixed a bug caused by multiple PyInterpreters. It is now possible to return a single array rather than a list containing a single array. diffs (truncated from 470 to 300 lines): diff --git a/monetdb5/extras/pyapi/Tests/All b/monetdb5/extras/pyapi/Tests/All --- a/monetdb5/extras/pyapi/Tests/All +++ b/monetdb5/extras/pyapi/Tests/All @@ -1,2 +1,3 @@ HAVE_LIBPY?pyapi00 +HAVE_LIBPY?pyapi01 HAVE_LIBPY?pyapi02 diff --git a/monetdb5/extras/pyapi/Tests/pyapi01.malC b/monetdb5/extras/pyapi/Tests/pyapi01.malC new file mode 100644 --- /dev/null +++ b/monetdb5/extras/pyapi/Tests/pyapi01.malC @@ -0,0 +1,8 @@ +#strings testing +bstr:= bat.new(:oid,:str); +bat.append(bstr,"asdf":str); +bat.append(bstr,"sd asd asd asd asd a":str); +bat.append(bstr,"":str); +bat.append(bstr,"test":str); +rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"return(arg1)",bstr); +io.print(rstr); diff --git a/monetdb5/extras/pyapi/Tests/pyapi02.malC b/monetdb5/extras/pyapi/Tests/pyapi02.malC --- a/monetdb5/extras/pyapi/Tests/pyapi02.malC +++ b/monetdb5/extras/pyapi/Tests/pyapi02.malC @@ -2,14 +2,14 @@ # inty types -bbit:= bat.new(:oid,:bit); -bat.append(bbit,1:bit); -bat.append(bbit,0:bit); -bat.append(bbit,1:bit); -bat.append(bbit,0:bit); -bat.append(bbit,nil:bit); -rbit:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"print(arg1)\nreturn([numpy.add(arg1.filled(0),42)])",bbit); -io.print(rbit); +#bbit:= bat.new(:oid,:bit); +#bat.append(bbit,1:bit); +#bat.append(bbit,0:bit); +#bat.append(bbit,1:bit); +#bat.append(bbit,0:bit); +#bat.append(bbit,nil:bit); +#rbit:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bbit); +#io.print(rbit); bbte:= bat.new(:oid,:bte); bat.append(bbte,42:bte); @@ -17,14 +17,15 @@ bat.append(bbte,84:bte); bat.append(bbte,111:bte); bat.append(bbte,0:bte); bat.append(bbte,nil:bte); -rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([[42,43]])",bbte); +io.print(bbte); +rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return numpy.add(arg1, 99)",bbte); io.print(rbte); bsht:= bat.new(:oid,:sht); bat.append(bsht,42:sht); bat.append(bsht,82:sht); bat.append(bsht,0:sht); -bat.append(bsht,3276:sht); +bat.append(bsht,240:sht); bat.append(bsht,nil:sht); rsht:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bsht); io.print(rsht); @@ -43,6 +44,7 @@ bat.append(bwrd,1804289383:wrd); bat.append(bwrd,846930886:wrd); bat.append(bwrd,1681692777:wrd); bat.append(bwrd,1714636915:wrd); +bat.append(bwrd,nil:wrd); rwrd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bwrd); io.print(rwrd); @@ -70,7 +72,8 @@ bat.append(bflt,18042.89383:flt); bat.append(bflt,846.930886:flt); bat.append(bflt,16.81692777:flt); bat.append(bflt,1714636.915:flt); -rflt:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bflt); +bat.append(bflt,nil:flt); +rflt:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bflt); io.print(rflt); bdbl:= bat.new(:oid,:dbl); @@ -79,18 +82,18 @@ bat.append(bdbl,84.6930886:dbl); bat.append(bdbl,168169.2777:dbl); bat.append(bdbl,17146369.15:dbl); bat.append(bdbl,nil:dbl); -rdbl:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bdbl); +rdbl:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bdbl); io.print(rdbl); # strings -bstr:= bat.new(:oid,:str); -bat.append(bstr,"asdf":str); -bat.append(bstr,"sd asd asd asd asd a":str); -bat.append(bstr,"":str); -bat.append(bstr,nil:str); -rstr:bat[:oid,:int] := pyapi.eval(nil:ptr,"unlist(lapply(arg1,nchar))",bstr); -io.print(rstr); +#bstr:= bat.new(:oid,:str); +#bat.append(bstr,"asdf":str); +#bat.append(bstr,"sd asd asd asd asd a":str); +#bat.append(bstr,"":str); +#bat.append(bstr,"test":str); +#rstr:bat[:oid,:int] := pyapi.eval(nil:ptr,"print(arg1);\nreturn([arg1])",bstr); +#io.print(rstr); @@ -103,26 +106,26 @@ bat.append(binto,1681692777:int); bat.append(binto,1714636915:int); bat.append(binto,nil:int); -rintbi:bat[:oid,:int] := pyapi.eval(nil:ptr,"arg1",binto); +rintbi:bat[:oid,:int] := pyapi.eval(nil:ptr,"return [arg1.filled(0)]",binto); io.print(rintbi); -rintbi2:bat[:oid,:int] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto); -io.print(rintbi2); +# rintbi2:bat[:oid,:int] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto); +# io.print(rintbi2); -rintbl:bat[:oid,:lng] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto); -io.print(rintbl); +# rintbl:bat[:oid,:lng] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto); +# io.print(rintbl); -rintbh:bat[:oid,:hge] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto); -io.print(rintbh); +# rintbh:bat[:oid,:hge] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto); +# io.print(rintbh); -rintbd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"as.numeric(arg1)",binto); -io.print(rintbd); +# rintbd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"as.numeric(arg1)",binto); +# io.print(rintbd); -rintbs:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.character(arg1)",binto); -io.print(rintbs); +# rintbs:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.character(arg1)",binto); +# io.print(rintbs); -# factors should be strings -rintbf:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.factor(arg1)",binto); -io.print(rintbf); +# # factors should be strings +# rintbf:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.factor(arg1)",binto); +# io.print(rintbf); diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -27,12 +27,15 @@ #include <string.h> const char* pyapi_enableflag = "embedded_py"; +char *NPYConstToString(int); +bool IsPyArrayObject(PyObject *); int PyAPIEnabled(void) { return (GDKgetenv_istrue(pyapi_enableflag) || GDKgetenv_isyes(pyapi_enableflag)); } + // TODO: exclude pyapi from mergetable, too // TODO: add to SQL layer // TODO: can we call the Python interpreter in a multi-thread environment? @@ -49,7 +52,14 @@ static int pyapiInitialized = FALSE; PyArrayObject* pCol = (PyArrayObject*) PyArray_FromAny(pColO, \ PyArray_DescrFromType(nptpe), 1, 1, NPY_ARRAY_CARRAY | \ NPY_ARRAY_FORCECAST, NULL); \ - size_t cnt = PyArray_DIMS(pCol)[0], j; \ + size_t cnt = 0; \ + if (pCol == NULL) \ + { \ + pCol = (PyArrayObject*) PyArray_FromAny(pColO, NULL, 1, 1, NPY_ARRAY_CARRAY, NULL); \ + msg = createException(MAL, "pyapi.eval", "Wrong return type in python function. Expected an array of type \"%s\" as return value, but the python function returned an array of type \"%s\".", #mtpe, NPYConstToString(PyArray_DTYPE(pCol)->type_num)); \ + goto wrapup; \ + } \ + cnt = PyArray_DIMS(pCol)[0], j; \ bat = BATnew(TYPE_void, TYPE_##mtpe, cnt, TRANSIENT); \ BATseqbase(bat, 0); bat->T->nil = 0; bat->T->nonil = 1; \ bat->tkey = 0; bat->tsorted = 0; bat->trevsorted = 0; \ @@ -58,6 +68,7 @@ static int pyapiInitialized = FALSE; *(mtpe*) PyArray_GETPTR1(pCol, j); } \ BATsetcount(bat, cnt); } +//todo: NULL // TODO: also handle the case if someone returns a masked array #define _PYAPI_DEBUG_ @@ -97,8 +108,12 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st node * argnode; int seengrp = FALSE; PyObject *pArgs, *pResult; // this is going to be the parameter tuple - PyThreadState* tstate; + BUN p = 0, q = 0; + BATiter li; + size_t count; + size_t maxsize; + size_t j; if (!PyAPIEnabled()) { throw(MAL, "pyapi.eval", @@ -120,7 +135,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st // this isolates our interpreter, so it's safe to run pyapi multithreaded // TODO: verify this - tstate = Py_NewInterpreter(); + /*tstate = Py_NewInterpreter();*/ // first argument after the return contains the pointer to the sql_func structure if (sqlfun != NULL && sqlfun->ops->cnt > 0) { @@ -178,7 +193,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } } - switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) { case TYPE_bte: vararray = BAT_TO_NP(b, bte, NPY_INT8); @@ -198,8 +212,49 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st case TYPE_dbl: vararray = BAT_TO_NP(b, dbl, NPY_FLOAT64); break; + case TYPE_str: + li = bat_iterator(b); - // TODO: implement other types (strings, boolean) + //we first loop over all the strings in the BAT to find the maximum length of a single string + //this is because NUMPY only supports strings with a fixed maximum length + maxsize = 0; + count = BATcount(b); + BATloop(b, p, q) + { + const char *t = (const char *) BUNtail(li, p); + const size_t length = (const size_t) strlen(t); + + if (strlen(t) > maxsize) + maxsize = length; + + } + + //create a NPY_STRING array object + vararray = PyArray_New( + &PyArray_Type, + 1, + (npy_intp[1]) {count}, + NPY_STRING, + NULL, + NULL, + maxsize, + 0, + NULL); + + //fill the NPY_STRING array object using the PyArray_SETITEM function + j = 0; + BATloop(b, p, q) + { + const char *t = (const char *) BUNtail(li, p); + PyArray_SETITEM((PyArrayObject*)vararray, PyArray_GETPTR1((PyArrayObject*)vararray, j), PyString_FromString(t)); + j++; + } + break; + case TYPE_hge: + vararray = BAT_TO_NP(b, hge, NPY_LONGLONG); + break; + + // TODO: implement other types (boolean) default: msg = createException(MAL, "pyapi.eval", "unknown argument type "); goto wrapup; @@ -218,7 +273,8 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype); BATiter bi = bat_iterator(b); - if (b->T->nil) { + if (b->T->nil) + { size_t j; for (j = 0; j < BATcount(b); j++) { if ((*atomcmp)(BUNtail(bi, BUNfirst(b) + j), nil) == 0) { @@ -227,8 +283,10 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } } } + PyTuple_SetItem(maargs, 0, vararray); PyTuple_SetItem(maargs, 1, (PyObject*) nullmask); + vararray = PyObject_CallObject(mafunc, maargs); if (!vararray) { msg = createException(MAL, "pyapi.eval", "UUUH"); @@ -241,7 +299,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st // TODO: do this later BBPunfix(b->batCacheid); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list