Changeset: 837dec34b419 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=837dec34b419 Added Files: monetdb5/extras/pyapi/Tests/pyapi_modify_input.malC monetdb5/extras/pyapi/Tests/pyapi_modify_input.stable.err monetdb5/extras/pyapi/Tests/pyapi_modify_input.stable.out monetdb5/extras/pyapi/bytearray.c monetdb5/extras/pyapi/bytearray.h Modified Files: monetdb5/extras/pyapi/Makefile.ag monetdb5/extras/pyapi/Tests/All monetdb5/extras/pyapi/Tests/pyapi_pandas.stable.err monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err monetdb5/extras/pyapi/Tests/pyapi_types_numeric.stable.err monetdb5/extras/pyapi/pyapi.c monetdb5/extras/pyapi/type_conversion.c monetdb5/extras/pyapi/type_conversion.h sql/backends/monet5/Tests/pyapi08.sql Branch: pyapi Log Message:
Added the option of converting strings into PyByteArrayObjects instead of PyStringObjects, which passes a reference to the string rather than copying the entire string. diffs (truncated from 663 to 300 lines): diff --git a/monetdb5/extras/pyapi/Makefile.ag b/monetdb5/extras/pyapi/Makefile.ag --- a/monetdb5/extras/pyapi/Makefile.ag +++ b/monetdb5/extras/pyapi/Makefile.ag @@ -17,7 +17,7 @@ MTSAFE lib__pyapi = { MODULE DIR = libdir/monetdb5 - SOURCES = pyapi.c pyapi.h unicode.c unicode.h pytypes.c pytypes.h type_conversion.c type_conversion.h + SOURCES = pyapi.c pyapi.h unicode.c unicode.h pytypes.c pytypes.h type_conversion.c type_conversion.h bytearray.c bytearray.h XDEPS = $(libpy_LIBDEP) LIBS = ../../tools/libmonetdb5 \ ../../../gdk/libbat \ diff --git a/monetdb5/extras/pyapi/Tests/All b/monetdb5/extras/pyapi/Tests/All --- a/monetdb5/extras/pyapi/Tests/All +++ b/monetdb5/extras/pyapi/Tests/All @@ -7,3 +7,4 @@ HAVE_LIBPY?pyapi_types_string HAVE_LIBPY?pyapi_numpy_boolean HAVE_LIBPY?pyapi_types_numeric HAVE_LIBPY?pyapi_numpy_numeric_nested +HAVE_LIBPY?pyapi_modify_input diff --git a/monetdb5/extras/pyapi/Tests/pyapi_modify_input.malC b/monetdb5/extras/pyapi/Tests/pyapi_modify_input.malC new file mode 100644 --- /dev/null +++ b/monetdb5/extras/pyapi/Tests/pyapi_modify_input.malC @@ -0,0 +1,34 @@ + +# a number of operations that might not work properly with byte arrays +# basic strings +bstr:= bat.new(:oid,:str); +bat.append(bstr,"asdf":str); +bat.append(bstr,"sd asd asd asd asd a":str); +bat.append(bstr,"1234":str); +bat.append(bstr,"test":str); + +# we are not allowed to change the value of the input BAT from within python code, check if this is possible with byte array operations +# setitem +rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"arg1[0].__setitem__(0,'a')\nreturn(arg1)",bstr); +# subscript +rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"arg1[0][0]='d'\nreturn(arg1)",bstr); +# reverse +rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"arg1[0].reverse()\nreturn(arg1)",bstr); +# replace +rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"arg1[0].replace('a','d')\nreturn(arg1)",bstr); +# remove +rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"a = arg1[0].remove('a')\nreturn(arg1)",bstr); +# delete +rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"a = arg1[0]\ndel a\nreturn(arg1)",bstr); +# pop +rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"a = arg1[0].pop()\nreturn(arg1)",bstr); +io.print(bstr); + +#try the same with other types just to be sure +a:= bat.new(:oid,:int); +bat.append(a,1:int); +bat.append(a,2:int); +bat.append(a,3:int); +bat.append(a,4:int); +result:bat[:oid,:int] := pyapi.eval(nil:ptr, "arg1[0]=4\nreturn(arg1)", a); +io.print(a); diff --git a/monetdb5/extras/pyapi/Tests/pyapi_modify_input.stable.err b/monetdb5/extras/pyapi/Tests/pyapi_modify_input.stable.err new file mode 100644 --- /dev/null +++ b/monetdb5/extras/pyapi/Tests/pyapi_modify_input.stable.err @@ -0,0 +1,58 @@ +stderr of test 'pyapi_modify_input` in directory 'monetdb5/extras/pyapi` itself: + + +# 12:27:28 > +# 12:27:28 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=31030" "--set" "mapi_usock=/var/tmp/mtest-30203/.s.monetdb.31030" "--set" "monet_prompt=" "--forcemito" "--set" "mal_listing=2" "--dbpath=/home/mytherin/opt/var/mTests_monetdb5_extras_pyapi" "--set" "mal_listing=2" "--set" "embedded_r=true" "--set" "embedded_py=true" +# 12:27:28 > + +# builtin opt gdk_dbpath = /home/mytherin/opt/var/monetdb5/dbfarm/demo +# builtin opt gdk_debug = 0 +# builtin opt gdk_vmtrim = no +# builtin opt monet_prompt = > +# builtin opt monet_daemon = no +# builtin opt mapi_port = 50000 +# builtin opt mapi_open = false +# builtin opt mapi_autosense = false +# builtin opt sql_optimizer = default_pipe +# builtin opt sql_debug = 0 +# cmdline opt gdk_nr_threads = 0 +# cmdline opt mapi_open = true +# cmdline opt mapi_port = 31030 +# cmdline opt mapi_usock = /var/tmp/mtest-30203/.s.monetdb.31030 +# cmdline opt monet_prompt = +# cmdline opt mal_listing = 2 +# cmdline opt gdk_dbpath = /home/mytherin/opt/var/mTests_monetdb5_extras_pyapi +# cmdline opt mal_listing = 2 +# cmdline opt embedded_r = true +# cmdline opt embedded_py = true +# cmdline opt gdk_debug = 536870922 + +# 12:27:29 > +# 12:27:29 > "mclient" "-lmal" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-30203" "--port=31030" +# 12:27:29 > + +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 +QUERY = rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"arg1[0][0]='d'\nreturn(arg1)",bstr); +ERROR = !MALException:pyapi.eval:Python exception + !This ByteArray references to a BAT in the database, you may not assign to it. +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 +QUERY = rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"arg1[0].reverse()\nreturn(arg1)",bstr); +ERROR = !MALException:pyapi.eval:Python exception + !This ByteArray references to a BAT in the database, you may not reverse it. +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 +QUERY = rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"a = arg1[0].remove('a')\nreturn(arg1)",bstr); +ERROR = !MALException:pyapi.eval:Python exception + !This ByteArray references to a BAT in the database, you may not remove anything from it. +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 +QUERY = rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"a = arg1[0].pop()\nreturn(arg1)",bstr); +ERROR = !MALException:pyapi.eval:Python exception + !This ByteArray references to a BAT in the database, you may not remove anything from it. +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 +QUERY = result:bat[:oid,:int] := pyapi.eval(nil:ptr, "arg1[0]=4\nreturn(arg1)", a); +ERROR = !MALException:pyapi.eval:Python exception + !assignment destination is read-only + +# 12:27:29 > +# 12:27:29 > "Done." +# 12:27:29 > + diff --git a/monetdb5/extras/pyapi/Tests/pyapi_modify_input.stable.out b/monetdb5/extras/pyapi/Tests/pyapi_modify_input.stable.out new file mode 100644 --- /dev/null +++ b/monetdb5/extras/pyapi/Tests/pyapi_modify_input.stable.out @@ -0,0 +1,54 @@ +stdout of test 'pyapi_modify_input` in directory 'monetdb5/extras/pyapi` itself: + + +# 12:27:28 > +# 12:27:28 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=31030" "--set" "mapi_usock=/var/tmp/mtest-30203/.s.monetdb.31030" "--set" "monet_prompt=" "--forcemito" "--set" "mal_listing=2" "--dbpath=/home/mytherin/opt/var/mTests_monetdb5_extras_pyapi" "--set" "mal_listing=2" "--set" "embedded_r=true" "--set" "embedded_py=true" +# 12:27:28 > + +# MonetDB 5 server v11.22.0 +# This is an unreleased version +# Serving database 'mTests_monetdb5_extras_pyapi', using 8 threads +# Compiled for x86_64-unknown-linux-gnu/64bit with 64bit OIDs and 128bit integers dynamically linked +# Found 7.684 GiB available main-memory. +# Copyright (c) 1993-July 2008 CWI. +# Copyright (c) August 2008-2015 MonetDB B.V., all rights reserved +# Visit http://www.monetdb.org/ for further information +# Listening for connection requests on mapi:monetdb://mytherin-N750JV:31030/ +# Listening for UNIX domain connection requests on mapi:monetdb:///var/tmp/mtest-30203/.s.monetdb.31030 +# Start processing logs sql/sql_logs version 52200 +# Start reading the write-ahead log 'sql_logs/sql/log.4' +# Finished reading the write-ahead log 'sql_logs/sql/log.4' +# Finished processing logs sql/sql_logs +# MonetDB/SQL module loaded +# MonetDB/Python module loaded +# MonetDB/R module loaded + +Ready. + +# 12:27:29 > +# 12:27:29 > "mclient" "-lmal" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-30203" "--port=31030" +# 12:27:29 > + +#io.print(bstr); +#--------------------------# +# h t # name +# void str # type +#--------------------------# +[ 0@0, "asdf" ] +[ 1@0, "sd asd asd asd asd a" ] +[ 2@0, "1234" ] +[ 3@0, "test" ] +#io.print(a); +#--------------------------# +# h t # name +# void int # type +#--------------------------# +[ 0@0, 1 ] +[ 1@0, 2 ] +[ 2@0, 3 ] +[ 3@0, 4 ] + +# 12:27:29 > +# 12:27:29 > "Done." +# 12:27:29 > + diff --git a/monetdb5/extras/pyapi/Tests/pyapi_pandas.stable.err b/monetdb5/extras/pyapi/Tests/pyapi_pandas.stable.err --- a/monetdb5/extras/pyapi/Tests/pyapi_pandas.stable.err +++ b/monetdb5/extras/pyapi/Tests/pyapi_pandas.stable.err @@ -31,7 +31,7 @@ stderr of test 'pyapi_pandas` in directo # 19:41:50 > "mclient" "-lmal" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-7261" "--port=31934" # 19:41:50 > -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"import pandas as pd\ndf = pd.DataFrame({\'Group\': arg1, \'Values\': arg2, 'Values2': arg2})\nreturn(df)", g, c); ERROR = !MALException:pyapi.eval:An array of size 3 was returned, yet we expect a list of 2 columns. The result is invalid. diff --git a/monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err b/monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err --- a/monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err +++ b/monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err @@ -31,34 +31,34 @@ stderr of test 'pyapi_returntypes` in di # 23:33:07 > "mclient" "-lmal" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-20340" "--port=36739" # 23:33:07 > -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = r:bat[:oid,:int] := pyapi.eval(nil:ptr,"return(\"Test\")"); ERROR = !MALException:pyapi.eval:Could not convert from type STRING to type int -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"class NewClass:\n x = 5\n\nreturn(NewClass())"); ERROR = !MALException:pyapi.eval:Unsupported result object. Expected either an array, a numpy array, a numpy masked array or a pandas data frame, but received an object of type "<type 'instance'>" -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return(12)"); ERROR = !MALException:pyapi.eval:A single scalar was returned, yet we expect a list of 2 columns. We can only convert a single scalar into a single column, thus the result is invalid. -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return(numpy.array([12]))"); ERROR = !MALException:pyapi.eval:A single array was returned, yet we expect a list of 2 columns. The result is invalid. -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return([12])"); ERROR = !MALException:pyapi.eval:A single array was returned, yet we expect a list of 2 columns. The result is invalid. -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = r:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([[33,24,55], [44,66,345]])"); ERROR = !MALException:pyapi.eval:An array of size 2 was returned, yet we expect a list of 1 columns. The result is invalid. -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = a:bat[:oid,:str] := pyapi.eval(nil:ptr,"x = unicode(\"hello\")\nreturn(x.encode(\"utf32\"))"); ERROR = !MALException:pyapi.eval:Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object. -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = bb:bat[:oid,:int] := pyapi.eval(nil:ptr,"return (1"); ERROR = !MALException:pyapi.eval:Could not parse Python code ! 1. def pyfun(): !> 2. return (1 !invalid syntax (<string>, line 2) -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = cc:bat[:oid,:int] := pyapi.eval(nil:ptr,"x = 4\n x++\n\treturn (x)"); ERROR = !MALException:pyapi.eval:Could not parse Python code ! 1. def pyfun(): @@ -66,10 +66,10 @@ ERROR = !MALException:pyapi.eval:Could n !> 3. x++ ! 4. return (x) !unexpected indent (<string>, line 3) -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = (str1:bat[:oid,:str], str2:bat[:oid,:str]) := pyapi.eval(nil:ptr,"x = unicode(\"hello\")\nreturn(numpy.array([[x.encode(\"utf32\")], [x.encode(\"utf32\")]]))"); ERROR = !MALException:pyapi.eval:Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object. -MAPI = (monetdb) /var/tmp/mtest-16516/.s.monetdb.30115 +MAPI = (monetdb) /var/tmp/mtest-30203/.s.monetdb.31030 QUERY = (str1:bat[:oid,:str], str2:bat[:oid,:str]) := pyapi.eval(nil:ptr,"return(numpy.array([[\"Hëllo\", \"Hello Again\"], [\"Hello Again Again\",\"That's quite enough.\"]]))"); ERROR = !MALException:pyapi.eval:Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object. diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err --- a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err +++ b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err @@ -26,14 +26,14 @@ stderr of test 'pyapi_types_huge` in dir # cmdline opt embedded_r = true # cmdline opt embedded_py = true # cmdline opt gdk_debug = 536870922 -!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. -!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. -!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. -!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. -!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. -!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. -!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. -!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. +!PERFORMANCE WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "PyLong". This means a python object is constructed for every huge integer and the entire column is copied. +!PERFORMANCE WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "PyLong". This means a python object is constructed for every huge integer and the entire column is copied. +!PERFORMANCE WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "PyLong". This means a python object is constructed for every huge integer and the entire column is copied. +!PERFORMANCE WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "PyLong". This means a python object is constructed for every huge integer and the entire column is copied. +!PERFORMANCE WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "PyLong". This means a python object is constructed for every huge integer and the entire column is copied. +!PERFORMANCE WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "PyLong". This means a python object is constructed for every huge integer and the entire column is copied. +!PERFORMANCE WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "PyLong". This means a python object is constructed for every huge integer and the entire column is copied. +!PERFORMANCE WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "PyLong". This means a python object is constructed for every huge integer and the entire column is copied. # 00:29:47 > # 00:29:47 > "mclient" "-lmal" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-30800" "--port=32717" diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.stable.err b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.stable.err --- a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.stable.err +++ b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.stable.err @@ -26,7 +26,7 @@ stderr of test 'pyapi_types_numeric` in # cmdline opt embedded_r = true # cmdline opt embedded_py = true # cmdline opt gdk_debug = 536870922 -!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. +!PERFORMANCE WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "PyLong". This means a python object is constructed for every huge integer and the entire column is copied. # 19:41:50 > # 19:41:50 > "mclient" "-lmal" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-7261" "--port=31934" diff --git a/monetdb5/extras/pyapi/bytearray.c b/monetdb5/extras/pyapi/bytearray.c new file mode 100644 --- /dev/null +++ b/monetdb5/extras/pyapi/bytearray.c @@ -0,0 +1,134 @@ + +#include "bytearray.h" + + _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list