Changeset: 77ad4b6244eb for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=77ad4b6244eb Modified Files: configure.ag monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC monetdb5/extras/pyapi/connection.c monetdb5/extras/pyapi/formatinput.c monetdb5/extras/pyapi/pyapi.c monetdb5/extras/pyapi/pytypes.c monetdb5/extras/pyapi/type_conversion.c monetdb5/extras/pyapi/type_conversion.h monetdb5/extras/pyapi/unicode.h sql/backends/monet5/Tests/pyapi00.sql sql/backends/monet5/Tests/pyapi09.sql sql/backends/monet5/Tests/pyapi14.sql sql/backends/monet5/Tests/pyapi16.sql sql/backends/monet5/Tests/pyapi21.sql sql/backends/monet5/Tests/pyapi24.sql Branch: pythonudf Log Message:
Added experimental Python3 support. Two new configure options allow you to select the python client and the python-config to link to. --with-pyversion=FILE specifies the python client (e.g. /usr/bin/python3 for python3) --with-pyconfig=FILE specifies the python-config (e.g. /usr/bin/python3-config for python3) Note that Python3 support is not complete, loading marshalled code objects is disabled (Tests:pyapi08,pyapi14) and returning 'bytes' objects triggers an assertion (Tests:pyapi09, pyapi21). In the future, we might want to have separate languages for Python2 and Python3 (e.g. LANGUAGE PYTHON, LANGUAGE PYTHON3), just like Postgres. diffs (truncated from 743 to 300 lines): diff --git a/configure.ag b/configure.ag --- a/configure.ag +++ b/configure.ag @@ -1235,6 +1235,14 @@ case "$have_python3" in ;; esac +AC_ARG_WITH(pyconfig, + AS_HELP_STRING([--with-pyconfig=FILE], [python-config is installed as FILE]), + have_pyconfig="$withval") + +AC_ARG_WITH(pyversion, + AS_HELP_STRING([--with-pyversion=FILE], [python is installed as FILE]), + have_pyversion="$withval") + dnl Figure out a default for PYTHON2 or PYTHON3 AC_PATH_PROG(PYTHON,python,no,$PATH) PYTHON_MAJ=unknown @@ -2305,24 +2313,33 @@ if test "x$enable_pyintegration" != xno; AC_MSG_ERROR([--enable-pyintegration value must be yes|no|auto|absolute path of python-config]) ;; esac - AC_PATH_PROG(PYCMD,python-config,,$XPATH) - if test "x$PYCMD" = x; then - if test "x$enable_pyintegration" = xyes; then - AC_MSG_ERROR([python-config library required for Python integration support]) - else - have_libpy="no" - why_have_libpy="(python-config command not found)" - enable_pyintegration=no - disable_pyintegration="(python-config command not found)" + if test "x$have_pyconfig" = x; then + AC_PATH_PROG(PYCMD,python-config,,$XPATH) + if test "x$PYCMD" = x; then + if test "x$enable_pyintegration" = xyes; then + AC_MSG_ERROR([python-config library required for Python integration support]) + else + have_libpy="no" + why_have_libpy="(python-config command not found)" + enable_pyintegration=no + disable_pyintegration="(python-config command not found)" + fi fi + else + PYCMD="$have_pyconfig" fi - PYTHON_CMD=`$PYCMD --exec-prefix`/bin/python - NUMPYVER=`$PYTHON_CMD -c "import numpy; print numpy.__version__.split('.').__getitem__(1) >= 7"` + + if test "x$have_pyversion" = x; then + PYTHON_CMD=$PYTHON2 + else + PYTHON_CMD=$have_pyversion + fi + NUMPYVER=`$PYTHON_CMD -c "import numpy; print(int(numpy.__version__.split('.').__getitem__(1)) >= 7)"` #check numpyconfig.h because autoconf tests includes by compiling a small C program, and other numpy headers do not compile without Python.h AC_CHECK_HEADER( [numpy/numpyconfig.h], [NUMPYHEADERS=True], - [NUMPYHEADERS=`$PYTHON_CMD -c "import numpy, os; print os.path.isfile(os.path.join(numpy.get_include(), 'numpy/arrayobject.h'))"`] + [NUMPYHEADERS=`$PYTHON_CMD -c "import numpy, os; print(os.path.isfile(os.path.join(numpy.get_include(), 'numpy/arrayobject.h')))"`] ) if [test "x$NUMPYVER" = x] || [test "x$NUMPYVER" = xFalse]; then if test "x$enable_pyintegration" = xyes; then @@ -2342,7 +2359,9 @@ if test "x$enable_pyintegration" != xno; enable_pyintegration=no disable_pyintegration="(numpy/arrayobject.h not found)" fi - elif libpy_CFLAGS=`\`$PYCMD --exec-prefix\`/bin/python -c "from distutils.sysconfig import get_python_inc; import numpy; print ' -I' + get_python_inc() + ' -I' + numpy.get_include();"` && libpy_LIBS=`$PYCMD --ldflags`; then + else + libpy_CFLAGS=`$PYTHON_CMD -c "from distutils.sysconfig import get_python_inc; import numpy; print(' -I' + get_python_inc() + ' -I' + numpy.get_include());"` + libpy_LIBS=`$PYCMD --ldflags` have_libpy=yes AC_DEFINE(HAVE_LIBPY, 1, [Define if we can link to python]) AC_SUBST(libpy_CFLAGS, $libpy_CFLAGS) diff --git a/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC b/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC --- a/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC +++ b/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC @@ -73,7 +73,7 @@ bat.append(fib, 28:int); bat.append(fib, 29:int); # define a function within the main function and map it to a BAT -s:bat[:oid,:dbl] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr == 0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0, nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn b\nreturn(map(fibonacci, arg1))", fib); +s:bat[:oid,:dbl] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr == 0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0, nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn b\nreturn(list(map(fibonacci, arg1)))", fib); io.print(s); indices:= bat.new(:oid,:int); @@ -231,5 +231,5 @@ bat.append(indices, 150:int); # now do it while returning a hge, to test returning very large numbers -s:bat[:oid,:hge] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr == 0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0, nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn b\nreturn(map(fibonacci, arg1))", indices); +s:bat[:oid,:hge] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr == 0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0, nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn b\nreturn(list(map(fibonacci, arg1)))", indices); io.print(s); diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC --- a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC +++ b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC @@ -110,6 +110,6 @@ bat.append(bint,846930886:int); bat.append(bint,67:int); bat.append(bint,124124124:int); rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"return(arg1)",bint); -rint:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([map(int, arg1)])",rstr); +rint:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([list(map(int, arg1))])",rstr); io.print(rint); diff --git a/monetdb5/extras/pyapi/connection.c b/monetdb5/extras/pyapi/connection.c --- a/monetdb5/extras/pyapi/connection.c +++ b/monetdb5/extras/pyapi/connection.c @@ -14,6 +14,12 @@ #endif #include <numpy/arrayobject.h> +#if PY_MAJOR_VERSION >= 3 +#define IS_PY3K +#define PyString_CheckExact PyUnicode_CheckExact +#define PyString_FromString PyUnicode_FromString +#endif + static PyObject * _connection_execute(Py_ConnectionObject *self, PyObject *args) { @@ -28,8 +34,14 @@ static PyObject * PyObject *result; res_table* output = NULL; char *res = NULL; + char *query; +#ifndef IS_PY3K + query = ((PyStringObject*)args)->ob_sval; +#else + query = PyUnicode_AsUTF8(args); +#endif - res = _connection_query(self->cntxt, ((PyStringObject*)args)->ob_sval, &output); + res = _connection_query(self->cntxt, query, &output); if (res != MAL_SUCCEED) { PyErr_Format(PyExc_Exception, "SQL Query Failed: %s", (res ? res : "<no error>")); return NULL; @@ -67,10 +79,16 @@ static PyObject * else #ifdef HAVE_FORK { + char *query; +#ifndef IS_PY3K + query = ((PyStringObject*)args)->ob_sval; +#else + query = PyUnicode_AsUTF8(args); +#endif // This is a mapped process, we do not want forked processes to touch the database // Only the main process may touch the database, so we ship the query back to the main process // copy the query into shared memory and tell the main process there is a query to handle - strncpy(self->query_ptr->query, ((PyStringObject*)args)->ob_sval, 8192); + strncpy(self->query_ptr->query, query, 8192); self->query_ptr->pending_query = true; //free the main process so it can work on the query change_semaphore_value(self->query_sem, 0, 1); @@ -213,6 +231,9 @@ PyTypeObject Py_ConnectionType = { 0, 0, 0 +#ifdef IS_PY3K + ,0 +#endif }; void _connection_cleanup_result(void* output) diff --git a/monetdb5/extras/pyapi/formatinput.c b/monetdb5/extras/pyapi/formatinput.c --- a/monetdb5/extras/pyapi/formatinput.c +++ b/monetdb5/extras/pyapi/formatinput.c @@ -10,9 +10,18 @@ #include "gdk.h" #include "mal_exception.h" +#if PY_MAJOR_VERSION >= 3 +#define IS_PY3K +#define PyString_FromStringAndSize PyUnicode_FromStringAndSize +#endif + const size_t additional_argcount = 3; const char * additional_args[] = {"_columns", "_column_types", "_conn"}; +#if PY_MAJOR_VERSION >= 3 +#define IS_PY3K +#endif + //! Parse a PyCodeObject from a string, the string is expected to be in the format {@<encoded_function>};, where <encoded_function> is the Marshalled code object PyObject *PyCodeObject_ParseString(char *string, char **msg); PyObject *PyCodeObject_ParseString(char *string, char **msg) @@ -94,10 +103,14 @@ char* FormatCode(char* code, char **args char base_start[] = "def pyfun("; char base_end[] = "):\n"; *msg = NULL; +#ifndef IS_PY3K if (code[1] == '@') { *code_object = PyCodeObject_ParseString(code, msg); return NULL; } +#else + (void) code_object; +#endif indentation_levels = (size_t*)GDKzalloc(max_indentation * sizeof(size_t)); statements_per_level = (size_t*)GDKzalloc(max_indentation * sizeof(size_t)); diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -46,6 +46,21 @@ #include <sys/wait.h> #endif +#if PY_MAJOR_VERSION >= 3 +#define IS_PY3K +#define PyString_FromString PyUnicode_FromString +#define PyString_Check PyUnicode_Check +#define PyString_CheckExact PyUnicode_CheckExact +#define PyString_AsString PyUnicode_AsUTF8 +#define PyString_AS_STRING PyUnicode_AsUTF8 +#define PyInt_FromLong PyLong_FromLong +#define PyInt_Check PyLong_Check +#define PythonUnicodeType char +#else +#define PythonUnicodeType Py_UNICODE + +#endif + const char* pyapi_enableflag = "embedded_py"; const char* verbose_enableflag = "enable_pyverbose"; const char* warning_enableflag = "enable_pywarnings"; @@ -318,7 +333,7 @@ Array of type %s no copying will be need case NPY_DOUBLE: \ case NPY_LONGDOUBLE: NP_COL_BAT_LOOP(bat, mtpe, dbl); break; \ case NPY_STRING: NP_COL_BAT_LOOP_FUNC(bat, mtpe, str_to_##mtpe, char); break; \ - case NPY_UNICODE: NP_COL_BAT_LOOP_FUNC(bat, mtpe, unicode_to_##mtpe, Py_UNICODE); break; \ + case NPY_UNICODE: NP_COL_BAT_LOOP_FUNC(bat, mtpe, unicode_to_##mtpe, PythonUnicodeType); break; \ case NPY_OBJECT: NP_COL_BAT_LOOP_FUNC(bat, mtpe, pyobject_to_##mtpe, PyObject*); break; \ default: \ msg = createException(MAL, "pyapi.eval", "Unrecognized type. Could not convert to %s.\n", BatType_Format(TYPE_##mtpe)); \ @@ -1265,7 +1280,11 @@ aggrwrapup: msg = createException(MAL, "pyapi.eval", "Expected a string key in the dictionary, but received an object of type %s", colname->ob_type->tp_name); goto wrapup; } +#ifndef IS_PY3K retnames[i] = ((PyStringObject*)colname)->ob_sval; +#else + retnames[i] = PyUnicode_AsUTF8(colname); +#endif } } pResult = PyDict_CheckForConversion(pResult, retcols, retnames, &msg); @@ -1614,7 +1633,11 @@ str bool PyType_IsPyScalar(PyObject *object) { if (object == NULL) return false; - return (PyArray_CheckScalar(object) || PyInt_Check(object) || PyFloat_Check(object) || PyLong_Check(object) || PyString_Check(object) || PyBool_Check(object) || PyUnicode_Check(object) || PyByteArray_Check(object)); + return (PyArray_CheckScalar(object) || PyInt_Check(object) || PyFloat_Check(object) || PyLong_Check(object) || PyString_Check(object) || PyBool_Check(object) || PyUnicode_Check(object) || PyByteArray_Check(object) +#ifdef IS_PY3K + || PyBytes_Check(object) +#endif + ); } @@ -2417,7 +2440,7 @@ BAT *PyObject_ConvertToBAT(PyReturn *ret b->T->nil = 1; BUNappend(b, str_nil, FALSE); } else { - if (!string_copy(&data[(index_offset * ret->count + iu) * ret->memory_size], utf8_string, ret->memory_size)) { + if (!string_copy(&data[(index_offset * ret->count + iu) * ret->memory_size], utf8_string, ret->memory_size, true)) { msg = createException(MAL, "pyapi.eval", "Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object.\n"); goto wrapup; } @@ -2462,21 +2485,32 @@ BAT *PyObject_ConvertToBAT(PyReturn *ret } else { //we try to handle as many types as possible PyObject *obj = *((PyObject**) &data[(index_offset * ret->count + iu) * ret->memory_size]); +#ifndef IS_PY3K if (PyString_CheckExact(obj)) { char *str = ((PyStringObject*)obj)->ob_sval; - if (!string_copy(str, utf8_string, strlen(str) + 1)) { + if (!string_copy(str, utf8_string, strlen(str) + 1, false)) { msg = createException(MAL, "pyapi.eval", "Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object.\n"); goto wrapup; } - } else if (PyByteArray_CheckExact(obj)) { + } else +#endif + if (PyByteArray_CheckExact(obj)) { char *str = ((PyByteArrayObject*)obj)->ob_bytes; - if (!string_copy(str, utf8_string, strlen(str) + 1)) { + if (!string_copy(str, utf8_string, strlen(str) + 1, false)) { msg = createException(MAL, "pyapi.eval", "Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object.\n"); goto wrapup; } } else if (PyUnicode_CheckExact(obj)) { +#ifndef IS_PY3K Py_UNICODE *str = (Py_UNICODE*)((PyUnicodeObject*)obj)->str; utf32_to_utf8(0, ((PyUnicodeObject*)obj)->length, utf8_string, str); +#else + char *str = PyUnicode_AsUTF8(obj); + if (!string_copy(str, utf8_string, strlen(str) + 1, true)) { + msg = createException(MAL, "pyapi.eval", "Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object.\n"); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list