Changeset: 77ad4b6244eb for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=77ad4b6244eb
Modified Files:
        configure.ag
        monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC
        monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
        monetdb5/extras/pyapi/connection.c
        monetdb5/extras/pyapi/formatinput.c
        monetdb5/extras/pyapi/pyapi.c
        monetdb5/extras/pyapi/pytypes.c
        monetdb5/extras/pyapi/type_conversion.c
        monetdb5/extras/pyapi/type_conversion.h
        monetdb5/extras/pyapi/unicode.h
        sql/backends/monet5/Tests/pyapi00.sql
        sql/backends/monet5/Tests/pyapi09.sql
        sql/backends/monet5/Tests/pyapi14.sql
        sql/backends/monet5/Tests/pyapi16.sql
        sql/backends/monet5/Tests/pyapi21.sql
        sql/backends/monet5/Tests/pyapi24.sql
Branch: pythonudf
Log Message:

Added experimental Python3 support.

Two new configure options allow you to select the python client and the 
python-config to link to.

--with-pyversion=FILE specifies the python client (e.g. /usr/bin/python3 for 
python3)
--with-pyconfig=FILE specifies the python-config (e.g. /usr/bin/python3-config 
for python3)

Note that Python3 support is not complete, loading marshalled code objects is 
disabled (Tests:pyapi08,pyapi14) and returning 'bytes' objects triggers an 
assertion (Tests:pyapi09, pyapi21).

In the future, we might want to have separate languages for Python2 and Python3 
(e.g. LANGUAGE PYTHON, LANGUAGE PYTHON3), just like Postgres.


diffs (truncated from 743 to 300 lines):

diff --git a/configure.ag b/configure.ag
--- a/configure.ag
+++ b/configure.ag
@@ -1235,6 +1235,14 @@ case "$have_python3" in
                ;;
 esac
 
+AC_ARG_WITH(pyconfig,
+       AS_HELP_STRING([--with-pyconfig=FILE], [python-config is installed as 
FILE]),
+       have_pyconfig="$withval")
+
+AC_ARG_WITH(pyversion,
+       AS_HELP_STRING([--with-pyversion=FILE], [python is installed as FILE]),
+       have_pyversion="$withval")
+
 dnl Figure out a default for PYTHON2 or PYTHON3
 AC_PATH_PROG(PYTHON,python,no,$PATH)
 PYTHON_MAJ=unknown
@@ -2305,24 +2313,33 @@ if test "x$enable_pyintegration" != xno;
                AC_MSG_ERROR([--enable-pyintegration value must be 
yes|no|auto|absolute path of python-config])
                ;;
        esac
-       AC_PATH_PROG(PYCMD,python-config,,$XPATH)
-       if test "x$PYCMD" = x; then
-               if test "x$enable_pyintegration" = xyes; then
-                       AC_MSG_ERROR([python-config library required for Python 
integration support])
-               else
-                       have_libpy="no"
-                       why_have_libpy="(python-config command not found)"
-                       enable_pyintegration=no
-                       disable_pyintegration="(python-config command not 
found)"
+       if test "x$have_pyconfig" = x; then
+               AC_PATH_PROG(PYCMD,python-config,,$XPATH)
+               if test "x$PYCMD" = x; then
+                       if test "x$enable_pyintegration" = xyes; then
+                               AC_MSG_ERROR([python-config library required 
for Python integration support])
+                       else
+                               have_libpy="no"
+                               why_have_libpy="(python-config command not 
found)"
+                               enable_pyintegration=no
+                               disable_pyintegration="(python-config command 
not found)"
+                       fi
                fi
+       else
+               PYCMD="$have_pyconfig"
        fi
-       PYTHON_CMD=`$PYCMD --exec-prefix`/bin/python
-       NUMPYVER=`$PYTHON_CMD -c "import numpy; print 
numpy.__version__.split('.').__getitem__(1) >= 7"`
+
+       if test "x$have_pyversion" = x; then
+               PYTHON_CMD=$PYTHON2
+       else
+               PYTHON_CMD=$have_pyversion
+       fi
+       NUMPYVER=`$PYTHON_CMD -c "import numpy; 
print(int(numpy.__version__.split('.').__getitem__(1)) >= 7)"`
        #check numpyconfig.h because autoconf tests includes by compiling a 
small C program, and other numpy headers do not compile without Python.h
        AC_CHECK_HEADER(
            [numpy/numpyconfig.h], 
                [NUMPYHEADERS=True],
-               [NUMPYHEADERS=`$PYTHON_CMD -c "import numpy, os; print 
os.path.isfile(os.path.join(numpy.get_include(), 'numpy/arrayobject.h'))"`]
+               [NUMPYHEADERS=`$PYTHON_CMD -c "import numpy, os; 
print(os.path.isfile(os.path.join(numpy.get_include(), 
'numpy/arrayobject.h')))"`]
                )
        if [test "x$NUMPYVER" = x] || [test "x$NUMPYVER" = xFalse]; then
                if test "x$enable_pyintegration" = xyes; then
@@ -2342,7 +2359,9 @@ if test "x$enable_pyintegration" != xno;
                        enable_pyintegration=no
                        disable_pyintegration="(numpy/arrayobject.h not found)"
                fi
-       elif libpy_CFLAGS=`\`$PYCMD --exec-prefix\`/bin/python -c "from 
distutils.sysconfig import get_python_inc; import numpy; print ' -I' + 
get_python_inc() + ' -I' + numpy.get_include();"` && libpy_LIBS=`$PYCMD 
--ldflags`; then
+       else
+               libpy_CFLAGS=`$PYTHON_CMD -c "from distutils.sysconfig import 
get_python_inc; import numpy; print(' -I' + get_python_inc() + ' -I' + 
numpy.get_include());"`
+               libpy_LIBS=`$PYCMD --ldflags`
                have_libpy=yes
                AC_DEFINE(HAVE_LIBPY, 1, [Define if we can link to python])
                AC_SUBST(libpy_CFLAGS, $libpy_CFLAGS)
diff --git a/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC 
b/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC
@@ -73,7 +73,7 @@ bat.append(fib, 28:int);
 bat.append(fib, 29:int);
 
 # define a function within the main function and map it to a BAT
-s:bat[:oid,:dbl] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr == 
0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0, 
nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn 
b\nreturn(map(fibonacci, arg1))", fib);
+s:bat[:oid,:dbl] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr == 
0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0, 
nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn 
b\nreturn(list(map(fibonacci, arg1)))", fib);
 io.print(s);
 
 indices:= bat.new(:oid,:int);
@@ -231,5 +231,5 @@ bat.append(indices, 150:int);
 
 
 # now do it while returning a hge, to test returning very large numbers
-s:bat[:oid,:hge] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr == 
0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0, 
nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn 
b\nreturn(map(fibonacci, arg1))", indices);
+s:bat[:oid,:hge] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr == 
0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0, 
nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn 
b\nreturn(list(map(fibonacci, arg1)))", indices);
 io.print(s);
diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC 
b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
@@ -110,6 +110,6 @@ bat.append(bint,846930886:int);
 bat.append(bint,67:int);
 bat.append(bint,124124124:int);
 rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"return(arg1)",bint);
-rint:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([map(int, arg1)])",rstr);
+rint:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([list(map(int, 
arg1))])",rstr);
 io.print(rint);
 
diff --git a/monetdb5/extras/pyapi/connection.c 
b/monetdb5/extras/pyapi/connection.c
--- a/monetdb5/extras/pyapi/connection.c
+++ b/monetdb5/extras/pyapi/connection.c
@@ -14,6 +14,12 @@
 #endif
 #include <numpy/arrayobject.h>
 
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#define PyString_CheckExact PyUnicode_CheckExact
+#define PyString_FromString PyUnicode_FromString
+#endif
+
 static PyObject *
 _connection_execute(Py_ConnectionObject *self, PyObject *args)
 {
@@ -28,8 +34,14 @@ static PyObject *
         PyObject *result;
         res_table* output = NULL;
         char *res = NULL;
+        char *query;
+#ifndef IS_PY3K
+        query = ((PyStringObject*)args)->ob_sval;
+#else
+        query = PyUnicode_AsUTF8(args);
+#endif
 
-        res = _connection_query(self->cntxt, ((PyStringObject*)args)->ob_sval, 
&output);
+        res = _connection_query(self->cntxt, query, &output);
         if (res != MAL_SUCCEED) {
             PyErr_Format(PyExc_Exception, "SQL Query Failed: %s", (res ? res : 
"<no error>"));
             return NULL;
@@ -67,10 +79,16 @@ static PyObject *
     else 
 #ifdef HAVE_FORK
     {
+        char *query;
+#ifndef IS_PY3K
+        query = ((PyStringObject*)args)->ob_sval;
+#else
+        query = PyUnicode_AsUTF8(args);
+#endif
         // This is a mapped process, we do not want forked processes to touch 
the database
         // Only the main process may touch the database, so we ship the query 
back to the main process
         // copy the query into shared memory and tell the main process there 
is a query to handle
-        strncpy(self->query_ptr->query, ((PyStringObject*)args)->ob_sval, 
8192);
+        strncpy(self->query_ptr->query, query, 8192);
         self->query_ptr->pending_query = true;
         //free the main process so it can work on the query
         change_semaphore_value(self->query_sem, 0, 1);
@@ -213,6 +231,9 @@ PyTypeObject Py_ConnectionType = {
     0, 
     0,
     0
+#ifdef IS_PY3K
+    ,0
+#endif
 };
 
 void _connection_cleanup_result(void* output) 
diff --git a/monetdb5/extras/pyapi/formatinput.c 
b/monetdb5/extras/pyapi/formatinput.c
--- a/monetdb5/extras/pyapi/formatinput.c
+++ b/monetdb5/extras/pyapi/formatinput.c
@@ -10,9 +10,18 @@
 #include "gdk.h"
 #include "mal_exception.h"
 
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#define PyString_FromStringAndSize PyUnicode_FromStringAndSize
+#endif
+
 const size_t additional_argcount = 3;
 const char * additional_args[] = {"_columns", "_column_types", "_conn"};
 
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#endif
+
 //! Parse a PyCodeObject from a string, the string is expected to be in the 
format {@<encoded_function>};, where <encoded_function> is the Marshalled code 
object
 PyObject *PyCodeObject_ParseString(char *string, char **msg);
 PyObject *PyCodeObject_ParseString(char *string, char **msg)
@@ -94,10 +103,14 @@ char* FormatCode(char* code, char **args
     char base_start[] = "def pyfun(";
     char base_end[] = "):\n";
     *msg = NULL;
+#ifndef IS_PY3K
     if (code[1] == '@') {
         *code_object = PyCodeObject_ParseString(code, msg);
         return NULL;
     }
+#else
+    (void) code_object;
+#endif
 
     indentation_levels = (size_t*)GDKzalloc(max_indentation * sizeof(size_t));
     statements_per_level = (size_t*)GDKzalloc(max_indentation * 
sizeof(size_t));
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -46,6 +46,21 @@
 #include <sys/wait.h>
 #endif
 
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#define PyString_FromString PyUnicode_FromString
+#define PyString_Check PyUnicode_Check
+#define PyString_CheckExact PyUnicode_CheckExact
+#define PyString_AsString PyUnicode_AsUTF8
+#define PyString_AS_STRING PyUnicode_AsUTF8
+#define PyInt_FromLong PyLong_FromLong
+#define PyInt_Check PyLong_Check
+#define PythonUnicodeType char
+#else
+#define PythonUnicodeType Py_UNICODE
+
+#endif
+
 const char* pyapi_enableflag = "embedded_py";
 const char* verbose_enableflag = "enable_pyverbose";
 const char* warning_enableflag = "enable_pywarnings";
@@ -318,7 +333,7 @@ Array of type %s no copying will be need
                 case NPY_DOUBLE:                                               
                                                                                
\
                 case NPY_LONGDOUBLE: NP_COL_BAT_LOOP(bat, mtpe, dbl); break;   
                                                                                
\
                 case NPY_STRING:     NP_COL_BAT_LOOP_FUNC(bat, mtpe, 
str_to_##mtpe, char); break;                                                    
                \
-                case NPY_UNICODE:    NP_COL_BAT_LOOP_FUNC(bat, mtpe, 
unicode_to_##mtpe, Py_UNICODE); break;                                          
                      \
+                case NPY_UNICODE:    NP_COL_BAT_LOOP_FUNC(bat, mtpe, 
unicode_to_##mtpe, PythonUnicodeType); break;                                   
                             \
                 case NPY_OBJECT:     NP_COL_BAT_LOOP_FUNC(bat, mtpe, 
pyobject_to_##mtpe, PyObject*); break;                                          
                     \
                 default:                                                       
                                                                                
\
                     msg = createException(MAL, "pyapi.eval", "Unrecognized 
type. Could not convert to %s.\n", BatType_Format(TYPE_##mtpe));                
    \
@@ -1265,7 +1280,11 @@ aggrwrapup:
                         msg = createException(MAL, "pyapi.eval", "Expected a 
string key in the dictionary, but received an object of type %s", 
colname->ob_type->tp_name);
                         goto wrapup;
                     }
+#ifndef IS_PY3K
                     retnames[i] = ((PyStringObject*)colname)->ob_sval;
+#else
+                    retnames[i] = PyUnicode_AsUTF8(colname);
+#endif
                 }
             }
             pResult = PyDict_CheckForConversion(pResult, retcols, retnames, 
&msg);
@@ -1614,7 +1633,11 @@ str
 bool PyType_IsPyScalar(PyObject *object)
 {
     if (object == NULL) return false;
-    return (PyArray_CheckScalar(object) || PyInt_Check(object) || 
PyFloat_Check(object) || PyLong_Check(object) || PyString_Check(object) || 
PyBool_Check(object) || PyUnicode_Check(object) || PyByteArray_Check(object));
+    return (PyArray_CheckScalar(object) || PyInt_Check(object) || 
PyFloat_Check(object) || PyLong_Check(object) || PyString_Check(object) || 
PyBool_Check(object) || PyUnicode_Check(object) || PyByteArray_Check(object)
+#ifdef IS_PY3K   
+        || PyBytes_Check(object)
+#endif
+        );
 }
 
 
@@ -2417,7 +2440,7 @@ BAT *PyObject_ConvertToBAT(PyReturn *ret
                             b->T->nil = 1;
                             BUNappend(b, str_nil, FALSE);
                         }  else {
-                            if (!string_copy(&data[(index_offset * ret->count 
+ iu) * ret->memory_size], utf8_string, ret->memory_size)) {
+                            if (!string_copy(&data[(index_offset * ret->count 
+ iu) * ret->memory_size], utf8_string, ret->memory_size, true)) {
                                 msg = createException(MAL, "pyapi.eval", 
"Invalid string encoding used. Please return a regular ASCII string, or a 
Numpy_Unicode object.\n");
                                 goto wrapup;
                             }
@@ -2462,21 +2485,32 @@ BAT *PyObject_ConvertToBAT(PyReturn *ret
                         } else {
                             //we try to handle as many types as possible
                             PyObject *obj = *((PyObject**) &data[(index_offset 
* ret->count + iu) * ret->memory_size]);
+#ifndef IS_PY3K             
                             if (PyString_CheckExact(obj)) {
                                 char *str = ((PyStringObject*)obj)->ob_sval;
-                                if (!string_copy(str, utf8_string, strlen(str) 
+ 1)) {
+                                if (!string_copy(str, utf8_string, strlen(str) 
+ 1, false)) {
                                     msg = createException(MAL, "pyapi.eval", 
"Invalid string encoding used. Please return a regular ASCII string, or a 
Numpy_Unicode object.\n");
                                     goto wrapup;
                                 }
-                            } else if (PyByteArray_CheckExact(obj)) {
+                            } else 
+#endif
+                            if (PyByteArray_CheckExact(obj)) {
                                 char *str = 
((PyByteArrayObject*)obj)->ob_bytes;
-                                if (!string_copy(str, utf8_string, strlen(str) 
+ 1)) {
+                                if (!string_copy(str, utf8_string, strlen(str) 
+ 1, false)) {
                                     msg = createException(MAL, "pyapi.eval", 
"Invalid string encoding used. Please return a regular ASCII string, or a 
Numpy_Unicode object.\n");
                                     goto wrapup;
                                 }
                             } else if (PyUnicode_CheckExact(obj)) {
+#ifndef IS_PY3K
                                 Py_UNICODE *str = 
(Py_UNICODE*)((PyUnicodeObject*)obj)->str;
                                 utf32_to_utf8(0, 
((PyUnicodeObject*)obj)->length, utf8_string, str);
+#else
+                                char *str = PyUnicode_AsUTF8(obj);
+                                if (!string_copy(str, utf8_string, strlen(str) 
+ 1, true)) {
+                                    msg = createException(MAL, "pyapi.eval", 
"Invalid string encoding used. Please return a regular ASCII string, or a 
Numpy_Unicode object.\n");
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to