Changeset: 3cb28e873fd1 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3cb28e873fd1 Added Files: sql/backends/monet5/Tests/pyloader05.sql sql/backends/monet5/Tests/pyloader05.stable.err sql/backends/monet5/Tests/pyloader05.stable.out Modified Files: monetdb5/extras/pyapi/convert_loops.h monetdb5/extras/pyapi/emit.c monetdb5/extras/pyapi/pyapi.c monetdb5/extras/pyapi/pyapi.h sql/backends/monet5/Tests/All Branch: pythonloader Log Message:
Fix importing string arrays and add testcase. diffs (truncated from 446 to 300 lines): diff --git a/monetdb5/extras/pyapi/convert_loops.h b/monetdb5/extras/pyapi/convert_loops.h --- a/monetdb5/extras/pyapi/convert_loops.h +++ b/monetdb5/extras/pyapi/convert_loops.h @@ -176,7 +176,7 @@ if (mask[index_offset * ret->count + iu] == TRUE) \ { \ bat->T->nil = 1; \ - BUNappend(b, str_nil, FALSE); \ + BUNappend(bat, str_nil, FALSE); \ } \ else \ { \ @@ -215,6 +215,84 @@ } \ bat->T->nonil = 1 - bat->T->nil; } \ +#define NP_INSERT_STRING_BAT(b) \ + switch(ret->result_type) \ + { \ + case NPY_BOOL: NP_COL_BAT_STR_LOOP(b, bit, "%hhd"); break; \ + case NPY_BYTE: NP_COL_BAT_STR_LOOP(b, bte, "%hhd"); break; \ + case NPY_SHORT: NP_COL_BAT_STR_LOOP(b, sht, "%hd"); break; \ + case NPY_INT: NP_COL_BAT_STR_LOOP(b, int, "%d"); break; \ + case NPY_LONG: NP_COL_BAT_STR_LOOP(b, long, "%ld"); break; \ + case NPY_LONGLONG: NP_COL_BAT_STR_LOOP(b, lng, LLFMT); break; \ + case NPY_UBYTE: NP_COL_BAT_STR_LOOP(b, unsigned char, "%hhu"); break; \ + case NPY_USHORT: NP_COL_BAT_STR_LOOP(b, unsigned short, "%hu"); break; \ + case NPY_UINT: NP_COL_BAT_STR_LOOP(b, unsigned int, "%u"); break; \ + case NPY_ULONG: NP_COL_BAT_STR_LOOP(b, unsigned long, "%lu"); break; \ + case NPY_ULONGLONG: NP_COL_BAT_STR_LOOP(b, unsigned long long, ULLFMT); break; \ + case NPY_FLOAT16: \ + case NPY_FLOAT: NP_COL_BAT_STR_LOOP(b, flt, "%f"); break; \ + case NPY_DOUBLE: \ + case NPY_LONGDOUBLE: NP_COL_BAT_STR_LOOP(b, dbl, "%lf"); break; \ + case NPY_STRING: \ + for (iu = 0; iu < ret->count; iu++) { \ + if (mask != NULL && (mask[index_offset * ret->count + iu]) == TRUE) { \ + b->T->nil = 1; \ + BUNappend(b, str_nil, FALSE); \ + } else { \ + if (!string_copy(&data[(index_offset * ret->count + iu) * ret->memory_size], utf8_string, ret->memory_size, true)) { \ + msg = createException(MAL, "pyapi.eval", "Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object.\n"); \ + goto wrapup; \ + } \ + BUNappend(b, utf8_string, FALSE); \ + } \ + } \ + break; \ + case NPY_UNICODE: \ + for (iu = 0; iu < ret->count; iu++) { \ + if (mask != NULL && (mask[index_offset * ret->count + iu]) == TRUE) { \ + b->T->nil = 1; \ + BUNappend(b, str_nil, FALSE); \ + } else { \ + utf32_to_utf8(0, ret->memory_size / 4, utf8_string, (const Py_UNICODE*)(&data[(index_offset * ret->count + iu) * ret->memory_size])); \ + BUNappend(b, utf8_string, FALSE); \ + } \ + } \ + break; \ + case NPY_OBJECT: \ + { \ + /* The resulting array is an array of pointers to various python objects */ \ + /* Because the python objects can be of any size, we need to allocate a different size utf8_string for every object */ \ + /* we will first loop over all the objects to get the maximum size needed, so we only need to do one allocation */ \ + size_t utf8_size = utf8string_minlength; \ + for (iu = 0; iu < ret->count; iu++) { \ + size_t size = utf8string_minlength; \ + PyObject *obj; \ + if (mask != NULL && (mask[index_offset * ret->count + iu]) == TRUE) continue; \ + obj = *((PyObject**) &data[(index_offset * ret->count + iu) * ret->memory_size]); \ + size = pyobject_get_size(obj); \ + if (size > utf8_size) utf8_size = size; \ + } \ + utf8_string = GDKzalloc(utf8_size); \ + for (iu = 0; iu < ret->count; iu++) { \ + if (mask != NULL && (mask[index_offset * ret->count + iu]) == TRUE) { \ + b->T->nil = 1; \ + BUNappend(b, str_nil, FALSE); \ + } else { \ + /* we try to handle as many types as possible */ \ + pyobject_to_str(((PyObject**) &data[(index_offset * ret->count + iu) * ret->memory_size]), utf8_size, &utf8_string); \ + BUNappend(b, utf8_string, FALSE); \ + } \ + } \ + break; \ + } \ + default: \ + msg = createException(MAL, "pyapi.eval", "Unrecognized type. Could not convert to NPY_UNICODE.\n"); \ + goto wrapup; \ + } \ + b->T->nonil = 1 - b->T->nil; \ + \ + + #ifdef HAVE_HGE #define NOT_HGE(mtpe) TYPE_##mtpe != TYPE_hge #else diff --git a/monetdb5/extras/pyapi/emit.c b/monetdb5/extras/pyapi/emit.c --- a/monetdb5/extras/pyapi/emit.c +++ b/monetdb5/extras/pyapi/emit.c @@ -4,6 +4,7 @@ #include "interprocess.h" #include "convert_loops.h" +#include "unicode.h" #if PY_MAJOR_VERSION >= 3 #define IS_PY3K @@ -202,6 +203,16 @@ loop_end: break; #endif case TYPE_str: + { + char *utf8_string = NULL; + if (ret->result_type != NPY_OBJECT) { + utf8_string = GDKzalloc(utf8string_minlength + ret->memory_size + 1); + utf8_string[utf8string_minlength + ret->memory_size] = '\0'; + } + NP_INSERT_STRING_BAT(self->cols[i].b); + if (utf8_string) GDKfree(utf8_string); + } + break; default: PyErr_Format(PyExc_TypeError, "Unsupported BAT Type %s", BatType_Format(self->cols[i].b->T->type)); return NULL; diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -51,8 +51,6 @@ bool option_warning; static PyObject *marshal_module = NULL; PyObject *marshal_loads = NULL; -const int utf8string_minlength = 256; - int PyAPIEnabled(void) { return (GDKgetenv_istrue(pyapi_enableflag) || GDKgetenv_isyes(pyapi_enableflag)); @@ -2212,82 +2210,8 @@ BAT *PyObject_ConvertToBAT(PyReturn *ret BATseqbase(b, seqbase); b->T->nil = 0; b->T->nonil = 1; b->tkey = 0; b->tsorted = 0; b->trevsorted = 0; VERBOSE_MESSAGE("- Collecting return values of type %s.\n", PyType_Format(ret->result_type)); - switch(ret->result_type) - { - case NPY_BOOL: NP_COL_BAT_STR_LOOP(b, bit, "%hhd"); break; - case NPY_BYTE: NP_COL_BAT_STR_LOOP(b, bte, "%hhd"); break; - case NPY_SHORT: NP_COL_BAT_STR_LOOP(b, sht, "%hd"); break; - case NPY_INT: NP_COL_BAT_STR_LOOP(b, int, "%d"); break; - case NPY_LONG: NP_COL_BAT_STR_LOOP(b, long, "%ld"); break; - case NPY_LONGLONG: NP_COL_BAT_STR_LOOP(b, lng, LLFMT); break; - case NPY_UBYTE: NP_COL_BAT_STR_LOOP(b, unsigned char, "%hhu"); break; - case NPY_USHORT: NP_COL_BAT_STR_LOOP(b, unsigned short, "%hu"); break; - case NPY_UINT: NP_COL_BAT_STR_LOOP(b, unsigned int, "%u"); break; - case NPY_ULONG: NP_COL_BAT_STR_LOOP(b, unsigned long, "%lu"); break; - case NPY_ULONGLONG: NP_COL_BAT_STR_LOOP(b, unsigned long long, ULLFMT); break; - case NPY_FLOAT16: - case NPY_FLOAT: NP_COL_BAT_STR_LOOP(b, flt, "%f"); break; - case NPY_DOUBLE: - case NPY_LONGDOUBLE: NP_COL_BAT_STR_LOOP(b, dbl, "%lf"); break; - case NPY_STRING: - for (iu = 0; iu < ret->count; iu++) { - if (mask != NULL && (mask[index_offset * ret->count + iu]) == TRUE) { - b->T->nil = 1; - BUNappend(b, str_nil, FALSE); - } else { - if (!string_copy(&data[(index_offset * ret->count + iu) * ret->memory_size], utf8_string, ret->memory_size, true)) { - msg = createException(MAL, "pyapi.eval", "Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object.\n"); - goto wrapup; - } - BUNappend(b, utf8_string, FALSE); - } - } - break; - case NPY_UNICODE: - for (iu = 0; iu < ret->count; iu++) { - if (mask != NULL && (mask[index_offset * ret->count + iu]) == TRUE) { - b->T->nil = 1; - BUNappend(b, str_nil, FALSE); - } else { - utf32_to_utf8(0, ret->memory_size / 4, utf8_string, (const Py_UNICODE*)(&data[(index_offset * ret->count + iu) * ret->memory_size])); - BUNappend(b, utf8_string, FALSE); - } - } - break; - case NPY_OBJECT: - { - //The resulting array is an array of pointers to various python objects - //Because the python objects can be of any size, we need to allocate a different size utf8_string for every object - //we will first loop over all the objects to get the maximum size needed, so we only need to do one allocation - size_t utf8_size = utf8string_minlength; - for (iu = 0; iu < ret->count; iu++) { - size_t size = utf8string_minlength; - PyObject *obj; - if (mask != NULL && (mask[index_offset * ret->count + iu]) == TRUE) continue; - obj = *((PyObject**) &data[(index_offset * ret->count + iu) * ret->memory_size]); - size = pyobject_get_size(obj); - if (size > utf8_size) utf8_size = size; - } - utf8_string = GDKzalloc(utf8_size); - for (iu = 0; iu < ret->count; iu++) { - if (mask != NULL && (mask[index_offset * ret->count + iu]) == TRUE) { - b->T->nil = 1; - BUNappend(b, str_nil, FALSE); - } else { - //we try to handle as many types as possible - pyobject_to_str(((PyObject**) &data[(index_offset * ret->count + iu) * ret->memory_size]), utf8_size, &utf8_string); - BUNappend(b, utf8_string, FALSE); - } - } - break; - } - default: - msg = createException(MAL, "pyapi.eval", "Unrecognized type. Could not convert to NPY_UNICODE.\n"); - goto wrapup; - } - GDKfree(utf8_string); - - b->T->nonil = 1 - b->T->nil; + NP_INSERT_STRING_BAT(b); + if (utf8_string) GDKfree(utf8_string); BATsetcount(b, (BUN) ret->count); BATsettrivprop(b); break; diff --git a/monetdb5/extras/pyapi/pyapi.h b/monetdb5/extras/pyapi/pyapi.h --- a/monetdb5/extras/pyapi/pyapi.h +++ b/monetdb5/extras/pyapi/pyapi.h @@ -120,6 +120,6 @@ str _loader_init(void); pyapi_export char *PyError_CreateException(char *error_text, char *pycall); #define pyapi_enableflag "embedded_py" - +#define utf8string_minlength 256 #endif /* _PYPI_LIB_ */ diff --git a/sql/backends/monet5/Tests/All b/sql/backends/monet5/Tests/All --- a/sql/backends/monet5/Tests/All +++ b/sql/backends/monet5/Tests/All @@ -55,6 +55,7 @@ HAVE_LIBPY?pyloader01 HAVE_LIBPY?pyloader02 HAVE_LIBPY?pyloader03 HAVE_LIBPY?pyloader04 +HAVE_LIBPY?pyloader05 # should this work? diff --git a/sql/backends/monet5/Tests/pyloader05.sql b/sql/backends/monet5/Tests/pyloader05.sql new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/Tests/pyloader05.sql @@ -0,0 +1,26 @@ + +# test string returns +START TRANSACTION; +CREATE TABLE pyloader05table(s STRING); +CREATE LOADER pyloader05() LANGUAGE PYTHON { + _emit.emit({'s': 33}); + _emit.emit({'s': 42.0}); + _emit.emit({'s': 'hello'}); + _emit.emit({'s': u'\u00D6'}); # \u00D6 = O + umlaut + _emit.emit({'s': [33, 'hello']}); + _emit.emit({'s': [42.0, 33]}); + _emit.emit({'s': numpy.array(['hello', 'hello', 'hello'])}); + _emit.emit({'s': [u'\u00D6', 'hello', 33]}); + _emit.emit({'s': numpy.array([u'\u00D6', 'hello', 33])}); + _emit.emit({'s': numpy.arange(3).astype(numpy.float32)}); + _emit.emit({'s': numpy.arange(3).astype(numpy.float64)}); + _emit.emit({'s': numpy.arange(3).astype(numpy.int8)}); + _emit.emit({'s': numpy.arange(3).astype(numpy.int16)}); + _emit.emit({'s': numpy.arange(3).astype(numpy.int32)}); + _emit.emit({'s': numpy.arange(3).astype(numpy.int64)}); +}; +COPY INTO pyloader05table FROM LOADER pyloader05(); +SELECT * FROM pyloader05table; +DROP TABLE pyloader05table; +DROP LOADER pyloader05; +ROLLBACK; diff --git a/sql/backends/monet5/Tests/pyloader05.stable.err b/sql/backends/monet5/Tests/pyloader05.stable.err new file mode 100644 --- /dev/null +++ b/sql/backends/monet5/Tests/pyloader05.stable.err @@ -0,0 +1,36 @@ +stderr of test 'pyloader05` in directory 'sql/backends/monet5` itself: + + +# 13:44:57 > +# 13:44:57 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=34686" "--set" "mapi_usock=/var/tmp/mtest-17327/.s.monetdb.34686" "--set" "monet_prompt=" "--forcemito" "--dbpath=/home/mytherin/opt/var/MonetDB/mTests_sql_backends_monet5" "--set" "embedded_r=yes" "--set" "embedded_py=true" +# 13:44:57 > + +# builtin opt gdk_dbpath = /home/mytherin/opt/var/monetdb5/dbfarm/demo +# builtin opt gdk_debug = 0 +# builtin opt gdk_vmtrim = no +# builtin opt monet_prompt = > +# builtin opt monet_daemon = no +# builtin opt mapi_port = 50000 +# builtin opt mapi_open = false +# builtin opt mapi_autosense = false +# builtin opt sql_optimizer = default_pipe +# builtin opt sql_debug = 0 +# cmdline opt gdk_nr_threads = 0 +# cmdline opt mapi_open = true +# cmdline opt mapi_port = 34686 _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list