Changeset: 9c7713cfbb6f for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9c7713cfbb6f Added Files: sql/backends/monet5/UDF/pyapi/conversion.c sql/backends/monet5/UDF/pyapi/conversion.h sql/backends/monet5/UDF/pyapi/undef.h Removed Files: sql/backends/monet5/UDF/pyapi/unspecified_evil.h Modified Files: sql/backends/monet5/UDF/pyapi/Makefile.ag sql/backends/monet5/UDF/pyapi/connection.c sql/backends/monet5/UDF/pyapi/connection.h sql/backends/monet5/UDF/pyapi/convert_loops.h sql/backends/monet5/UDF/pyapi/emit.c sql/backends/monet5/UDF/pyapi/formatinput.h sql/backends/monet5/UDF/pyapi/pyapi.c sql/backends/monet5/UDF/pyapi/pyapi.h sql/backends/monet5/UDF/pyapi/pyloader.c sql/backends/monet5/UDF/pyapi/pytypes.c sql/backends/monet5/UDF/pyapi/pytypes.h sql/backends/monet5/UDF/pyapi/type_conversion.h sql/backends/monet5/UDF/pyapi/unicode.h Branch: default Log Message:
Clean up Python UDF code. Split up the code in several different files and isolate conversion code from UDF code so it can be used without pulling in the UDFs. diffs (truncated from 2627 to 300 lines): diff --git a/sql/backends/monet5/UDF/pyapi/Makefile.ag b/sql/backends/monet5/UDF/pyapi/Makefile.ag --- a/sql/backends/monet5/UDF/pyapi/Makefile.ag +++ b/sql/backends/monet5/UDF/pyapi/Makefile.ag @@ -24,7 +24,7 @@ MTSAFE lib__pyapi = { MODULE DIR = libdir/monetdb5 - SOURCES = pyapi.c pyapi.h unicode.c unicode.h pytypes.c pytypes.h type_conversion.c type_conversion.h formatinput.c formatinput.h connection.c connection.h unspecified_evil.h pyloader.c emit.h emit.c convert_loops.h + SOURCES = pyapi.c pyapi.h unicode.c unicode.h pytypes.c pytypes.h type_conversion.c type_conversion.h formatinput.c formatinput.h connection.c connection.h undef.h pyloader.c emit.h emit.c conversion.c conversion.h convert_loops.h XDEPS = $(libpy_LIBDEP) LIBS = ../../../../../monetdb5/tools/libmonetdb5 \ ../../../../../gdk/libbat \ diff --git a/sql/backends/monet5/UDF/pyapi/connection.c b/sql/backends/monet5/UDF/pyapi/connection.c --- a/sql/backends/monet5/UDF/pyapi/connection.c +++ b/sql/backends/monet5/UDF/pyapi/connection.c @@ -1,4 +1,6 @@ +#include "pyapi.h" +#include "conversion.h" #include "connection.h" #include "type_conversion.h" #include "gdk_interprocess.h" diff --git a/sql/backends/monet5/UDF/pyapi/connection.h b/sql/backends/monet5/UDF/pyapi/connection.h --- a/sql/backends/monet5/UDF/pyapi/connection.h +++ b/sql/backends/monet5/UDF/pyapi/connection.h @@ -8,7 +8,8 @@ /* * M. Raasveldt - * + * The connection object is a Python object that can be used + * to query the database from within UDFs (i.e. loopback queries) */ #ifndef _LOOPBACK_QUERY_ @@ -17,10 +18,21 @@ #include "pytypes.h" #include "emit.h" +// The QueryStruct is used to send queries between a forked process and the main server +typedef struct { + bool pending_query; + char query[8192]; + int nr_cols; + int mmapid; + size_t memsize; +} QueryStruct; + typedef struct { PyObject_HEAD Client cntxt; - bit mapped; + bit mapped; /* indicates whether or not the connection is in a forked process + * (i.e. have to use interprocess communication to transfer query results) + */ QueryStruct *query_ptr; int query_sem; } Py_ConnectionObject; diff --git a/sql/backends/monet5/UDF/pyapi/conversion.c b/sql/backends/monet5/UDF/pyapi/conversion.c new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/UDF/pyapi/conversion.c @@ -0,0 +1,937 @@ + +#include "conversion.h" +#include "convert_loops.h" +#include "pytypes.h" +#include "type_conversion.h" +#include "unicode.h" +#include "gdk_interprocess.h" + +CREATE_SQL_FUNCTION_PTR(str,batbte_dec2_dbl); +CREATE_SQL_FUNCTION_PTR(str,batsht_dec2_dbl); +CREATE_SQL_FUNCTION_PTR(str,batint_dec2_dbl); +CREATE_SQL_FUNCTION_PTR(str,batlng_dec2_dbl); +#ifdef HAVE_HGE +CREATE_SQL_FUNCTION_PTR(str,bathge_dec2_dbl); +#endif +CREATE_SQL_FUNCTION_PTR(str,batstr_2time_timestamp); +CREATE_SQL_FUNCTION_PTR(str,batstr_2time_daytime); +CREATE_SQL_FUNCTION_PTR(str,batstr_2_date); +CREATE_SQL_FUNCTION_PTR(str,batdbl_num2dec_lng); + +//! Wrapper to get eclass of SQL type +int GetSQLType(sql_subtype *sql_subtype); + +PyObject *PyArrayObject_FromScalar(PyInput* inp, char **return_message) +{ + PyObject *vararray = NULL; + char *msg = NULL; + assert(inp->scalar); //input has to be a scalar + + switch(inp->bat_type) + { + case TYPE_bit: + vararray = PyInt_FromLong((long)(*(bit*)inp->dataptr)); + break; + case TYPE_bte: + vararray = PyInt_FromLong((long)(*(bte*)inp->dataptr)); + break; + case TYPE_sht: + vararray = PyInt_FromLong((long)(*(sht*)inp->dataptr)); + break; + case TYPE_int: + vararray = PyInt_FromLong((long)(*(int*)inp->dataptr)); + break; + case TYPE_lng: + vararray = PyLong_FromLongLong((*(lng*)inp->dataptr)); + break; + case TYPE_flt: + vararray = PyFloat_FromDouble((double)(*(flt*)inp->dataptr)); + break; + case TYPE_dbl: + vararray = PyFloat_FromDouble((double)(*(dbl*)inp->dataptr)); + break; +#ifdef HAVE_HGE + case TYPE_hge: + vararray = PyLong_FromHge(*((hge *) inp->dataptr)); + break; +#endif + case TYPE_str: + vararray = PyUnicode_FromString(*((char**) inp->dataptr)); + break; + default: + msg = createException(MAL, "pyapi.eval", "Unsupported scalar type %i.", inp->bat_type); + goto wrapup; + } + if (vararray == NULL) + { + msg = createException(MAL, "pyapi.eval", "Something went wrong converting the MonetDB scalar to a Python scalar."); + goto wrapup; + } +wrapup: + *return_message = msg; + return vararray; +} + +PyObject *PyMaskedArray_FromBAT(PyInput *inp, size_t t_start, size_t t_end, char **return_message, bool copy) +{ + BAT *b = inp->bat; + char *msg; + PyObject *vararray; + + vararray = PyArrayObject_FromBAT(inp, t_start, t_end, return_message, copy); + if (vararray == NULL) { + return NULL; + } + // To deal with null values, we use the numpy masked array structure + // The masked array structure is an object with two arrays of equal size, a data array and a mask array + // The mask array is a boolean array that has the value 'True' when the element is NULL, and 'False' otherwise + // If the BAT has Null values, we construct this masked array + if (!(b->tnil == 0 && b->tnonil == 1)) + { + PyObject *mask; + PyObject *mafunc = PyObject_GetAttrString(PyImport_Import(PyString_FromString("numpy.ma")), "masked_array"); + PyObject *maargs; + PyObject *nullmask = PyNullMask_FromBAT(b, t_start, t_end); + + if (nullmask == Py_None) { + maargs = PyTuple_New(1); + PyTuple_SetItem(maargs, 0, vararray); + } else { + maargs = PyTuple_New(2); + PyTuple_SetItem(maargs, 0, vararray); + PyTuple_SetItem(maargs, 1, (PyObject*) nullmask); + } + + // Now we will actually construct the mask by calling the masked array constructor + mask = PyObject_CallObject(mafunc, maargs); + if (!mask) { + msg = createException(MAL, "pyapi.eval", "Failed to create mask"); + goto wrapup; + } + Py_DECREF(maargs); + Py_DECREF(mafunc); + + vararray = mask; + } + return vararray; +wrapup: + *return_message = msg; + return NULL; +} + +PyObject *PyArrayObject_FromBAT(PyInput *inp, size_t t_start, size_t t_end, char **return_message, bool copy) +{ + // This variable will hold the converted Python object + PyObject *vararray = NULL; + char *msg; + size_t j = 0; + BUN p = 0, q = 0; + BATiter li; + BAT *b = inp->bat; + npy_intp elements[1] = { t_end-t_start }; + + assert(!inp->scalar); //input has to be a BAT + + if (!b) { + // No BAT was found, we can't do anything in this case + msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL" bat."); + goto wrapup; + } + + if (!IsStandardBATType(inp->bat_type) || ConvertableSQLType(inp->sql_subtype)) { // if the sql type is set, we have to do some conversion + if (inp->scalar) { + // todo: scalar SQL types + msg = createException(MAL, "pyapi.eval", "Scalar SQL types haven't been implemented yet... sorry"); + goto wrapup; + } else { + BAT *ret_bat = NULL; + msg = ConvertFromSQLType(inp->bat, inp->sql_subtype, &ret_bat, &inp->bat_type); + if (msg != MAL_SUCCEED) { + msg = createException(MAL, "pyapi.eval", "Failed to convert BAT."); + goto wrapup; + } + BBPunfix(inp->bat->batCacheid); + inp->bat = ret_bat; + } + } + + b = inp->bat; + + switch (inp->bat_type) { + case TYPE_bte: + BAT_TO_NP(b, bte, NPY_INT8); + break; + case TYPE_sht: + BAT_TO_NP(b, sht, NPY_INT16); + break; + case TYPE_int: + BAT_TO_NP(b, int, NPY_INT32); + break; + case TYPE_lng: + BAT_TO_NP(b, lng, NPY_INT64); + break; + case TYPE_flt: + BAT_TO_NP(b, flt, NPY_FLOAT32); + break; + case TYPE_dbl: + BAT_TO_NP(b, dbl, NPY_FLOAT64); + break; + case TYPE_str: + { + bool unicode = false; + li = bat_iterator(b); + //create a NPY_OBJECT array object + vararray = PyArray_New( + &PyArray_Type, + 1, + elements, + NPY_OBJECT, + NULL, + NULL, + 0, + 0, + NULL); + + BATloop(b, p, q) { + char *t = (char *) BUNtail(li, p); + for(; *t != 0; t++) { + if (*t < 0) { + unicode = true; + break; + } + } + if (unicode) { + break; + } + } + + { + PyObject **data = ((PyObject**)PyArray_DATA((PyArrayObject*)vararray)); + PyObject *obj; + j = 0; + if (unicode) { + if (GDK_ELIMDOUBLES(b->tvheap)) { + PyObject** pyptrs = GDKzalloc(b->tvheap->free * sizeof(PyObject*)); + if (!pyptrs) { + msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL" PyObject strings."); + goto wrapup; + } + BATloop(b, p, q) { + const char *t = (const char *) BUNtail(li, p); + ptrdiff_t offset = t - b->tvheap->base; + if (!pyptrs[offset]) { + if (strcmp(t, str_nil) == 0) { + //str_nil isn't a valid UTF-8 character (it's 0x80), so we can't decode it as UTF-8 (it will throw an error) + pyptrs[offset] = PyUnicode_FromString("-"); + } else { + //otherwise we can just decode the string as UTF-8 + pyptrs[offset] = PyUnicode_FromString(t); + } + if (!pyptrs[offset]) { + msg = createException(MAL, "pyapi.eval", "Failed to create string."); + goto wrapup; + } + } else { + Py_INCREF(pyptrs[offset]); + } + data[j++] = pyptrs[offset]; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list