Changeset: 0ea25c43cfce for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0ea25c43cfce Modified Files: monetdb5/extras/pyapi/pyapi.c sql/backends/monet5/Tests/pyapi12.sql sql/backends/monet5/Tests/pyapi12.stable.out Branch: pyapi Log Message:
Fix for returning NPY_OBJECT arrays when using PYTHON_MAP. diffs (116 lines): diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -893,8 +893,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st PyObject * pColO = NULL; // This is the PyReturn header information for the current return value, we will fill this now PyReturn *ret = &pyreturn_values[i]; - // This is the expected BAT result type (the type of BAT we have to make) - int bat_type = ATOMstorage(getColumnType(getArgType(mb,pci,i))); ret->multidimensional = FALSE; // There are three possibilities (we have ensured this right after executing the Python call) @@ -936,16 +934,10 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } else { // If it isn't we need to convert pColO to the expected Numpy Array type - ret->numpy_array = NULL; - (void) bat_type; - //if (bat_type != TYPE_str) ret->numpy_array = (PyArrayObject*) PyArray_FromAny(pColO, PyArray_DescrFromType(BatType_ToPyType(bat_type)), 1, 1, NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, NULL); + ret->numpy_array = PyArray_FromAny(pColO, NULL, 1, 1, NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, NULL); if (ret->numpy_array == NULL) { - // If this conversion fails, we will set the expected type to NULL, this means it will automatically pick a type for us - ret->numpy_array = PyArray_FromAny(pColO, NULL, 1, 1, NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, NULL); - if (ret->numpy_array == NULL) { - msg = createException(MAL, "pyapi.eval", "Could not create a Numpy array from the return type.\n"); - goto wrapup; - } + msg = createException(MAL, "pyapi.eval", "Could not create a Numpy array from the return type.\n"); + goto wrapup; } ret->result_type = PyArray_DESCR((PyArrayObject*)ret->numpy_array)->type_num; // We read the result type from the resulting array ret->memory_size = PyArray_DESCR((PyArrayObject*)ret->numpy_array)->elsize; @@ -992,8 +984,28 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st for (i = 0; i < pci->retc; i++) { PyReturn *ret = &pyreturn_values[i]; + ReturnBatDescr *descr = &ptr[(process_id - 1) * pci->retc + i]; - ReturnBatDescr *descr = &ptr[(process_id - 1) * pci->retc + i]; + if (ret->result_type == NPY_OBJECT) { + // We can't deal with NPY_OBJECT arrays, because these are 'arrays of pointers', so we can't just copy the content of the array into shared memory + // So if we're dealing with a NPY_OBJECT array, we convert them to a Numpy Array of type NPY_<TYPE> that corresponds with the desired BAT type + // WARNING: Because we could be converting to a NPY_STRING or NPY_UNICODE array (if the desired type is TYPE_str or TYPE_hge), this means that memory usage can explode + // because NPY_STRING/NPY_UNICODE arrays are 2D string arrays with fixed string length (so if there's one very large string the size explodes quickly) + // if someone has some problem with memory size exploding when using PYTHON_MAP but it being fine in regular PYTHON this is probably the issue + int bat_type = ATOMstorage(getColumnType(getArgType(mb,pci,i))); + PyObject *new_array = PyArray_FromAny(ret->numpy_array, PyArray_DescrFromType(BatType_ToPyType(bat_type)), 1, 1, NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, NULL); + if (new_array == NULL) { + msg = createException(MAL, "pyapi.eval", "Could not convert the returned NPY_OBJECT array to the desired array of type %s.\n", BatType_Format(bat_type)); + goto wrapup; + } + Py_DECREF(ret->numpy_array); //do we really care about cleaning this up, considering this only happens in a separate process that will be exited soon anyway? + ret->numpy_array = new_array; + ret->result_type = PyArray_DESCR((PyArrayObject*)ret->numpy_array)->type_num; + ret->memory_size = PyArray_DESCR((PyArrayObject*)ret->numpy_array)->elsize; + ret->count = PyArray_DIMS((PyArrayObject*)ret->numpy_array)[0]; + ret->array_data = PyArray_DATA((PyArrayObject*)ret->numpy_array); + } + descr->npy_type = ret->result_type; descr->element_size = ret->memory_size; descr->bat_count = ret->count; diff --git a/sql/backends/monet5/Tests/pyapi12.sql b/sql/backends/monet5/Tests/pyapi12.sql --- a/sql/backends/monet5/Tests/pyapi12.sql +++ b/sql/backends/monet5/Tests/pyapi12.sql @@ -3,12 +3,24 @@ START TRANSACTION; CREATE TABLE rval(i integer); INSERT INTO rval VALUES (1),(2),(3),(4),(-1),(0); +# PYTHON_MAP test in WHERE CREATE FUNCTION pyapi12(i integer,z integer) returns boolean language PYTHON_MAP { return(numpy.greater(i,z)) }; SELECT * FROM rval WHERE pyapi12(i,2); DROP FUNCTION pyapi12; + + +# Return NPY_OBJECT test +CREATE FUNCTION pyapi12(i integer,z integer) returns string language PYTHON_MAP +{ + return(numpy.array(['Hello'] * len(i), dtype=object)) +}; +SELECT pyapi12(i,2) FROM rval; +DROP FUNCTION pyapi12; + + DROP TABLE rval; diff --git a/sql/backends/monet5/Tests/pyapi12.stable.out b/sql/backends/monet5/Tests/pyapi12.stable.out --- a/sql/backends/monet5/Tests/pyapi12.stable.out +++ b/sql/backends/monet5/Tests/pyapi12.stable.out @@ -74,6 +74,22 @@ Ready. [ 3 ] [ 4 ] #DROP FUNCTION pyapi12; +#CREATE FUNCTION pyapi12(i integer,z integer) returns string language P +#{ +# return(numpy.array(['Hello', 'Hello'], dtype=object)) +#}; +#SELECT pyapi12(i,2) FROM rval; +% sys.L # table_name +% pyapi12_i # name +% clob # type +% 5 # length +[ "Hello" ] +[ "Hello" ] +[ "Hello" ] +[ "Hello" ] +[ "Hello" ] +[ "Hello" ] +#DROP FUNCTION pyapi12; #DROP TABLE rval; #ROLLBACK; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list