Changeset: f959f735a880 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f959f735a880 Modified Files: monetdb5/extras/pyapi/Tests/pyapi_returntypes.malC monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err monetdb5/extras/pyapi/pyapi.c sql/backends/monet5/Tests/pyapi08.sql sql/backends/monet5/Tests/pyapi10.sql sql/backends/monet5/Tests/pyapi10.stable.err Branch: pyapi Log Message:
Reworked python code indenter. diffs (truncated from 865 to 300 lines): diff --git a/monetdb5/extras/pyapi/Tests/pyapi_returntypes.malC b/monetdb5/extras/pyapi/Tests/pyapi_returntypes.malC --- a/monetdb5/extras/pyapi/Tests/pyapi_returntypes.malC +++ b/monetdb5/extras/pyapi/Tests/pyapi_returntypes.malC @@ -163,7 +163,7 @@ r:bat[:oid,:int] := pyapi.eval(nil:ptr," io.print(r); # return an unsupported object -(r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"class NewClass:\n\tx = 5\n\n\treturn(NewClass())"); +(r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"class NewClass:\n x = 5\n\nreturn(NewClass())"); # return a scalar when multiple returns are expected (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return(12)"); diff --git a/monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err b/monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err --- a/monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err +++ b/monetdb5/extras/pyapi/Tests/pyapi_returntypes.stable.err @@ -31,48 +31,48 @@ stderr of test 'pyapi_returntypes` in di # 23:33:07 > "mclient" "-lmal" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-20340" "--port=36739" # 23:33:07 > -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = r:bat[:oid,:int] := pyapi.eval(nil:ptr,"return(\"Test\")"); ERROR = !MALException:pyapi.eval:Could not convert from type STRING to type int -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 -QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"class NewClass:\n\tx = 5\n\n\treturn(NewClass())"); +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 +QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"class NewClass:\n x = 5\n\nreturn(NewClass())"); ERROR = !MALException:pyapi.eval:Unsupported result object. Expected either an array, a numpy array, a numpy masked array or a pandas data frame, but received an object of type "<type 'instance'>" -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return(12)"); ERROR = !MALException:pyapi.eval:A single scalar was returned, yet we expect a list of 2 columns. We can only convert a single scalar into a single column, thus the result is invalid. -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"import pandas as pd\ndf = pd.DataFrame({\'Group\': arg1, \'Values\': arg2, 'Values2': arg2})\nreturn(df)", g, b); ERROR = !MALException:pyapi.eval:An array of size 3 was returned, yet we expect a list of 2 columns. The result is invalid. -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return(numpy.array([12]))"); ERROR = !MALException:pyapi.eval:A single array was returned, yet we expect a list of 2 columns. The result is invalid. -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = (r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return([12])"); ERROR = !MALException:pyapi.eval:A single array was returned, yet we expect a list of 2 columns. The result is invalid. -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = r:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([[33,24,55], [44,66,345]])"); ERROR = !MALException:pyapi.eval:An array of size 2 was returned, yet we expect a list of 1 columns. The result is invalid. -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = a:bat[:oid,:str] := pyapi.eval(nil:ptr,"x = unicode(\"hello\")\nreturn(x.encode(\"utf32\"))"); ERROR = !MALException:pyapi.eval:Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object. -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = bb:bat[:oid,:int] := pyapi.eval(nil:ptr,"return (1"); ERROR = !MALException:pyapi.eval:Could not parse Python code - !def pyfun(): - ! return (1 + ! 1. def pyfun(): + !> 2. return (1 !invalid syntax (<string>, line 2) -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = cc:bat[:oid,:int] := pyapi.eval(nil:ptr,"x = 4\n x++\n\treturn (x)"); ERROR = !MALException:pyapi.eval:Could not parse Python code - !def pyfun(): - ! x = 4 - ! x++ - ! return (x) + ! 1. def pyfun(): + ! 2. x = 4 + !> 3. x++ + ! 4. return (x) !unexpected indent (<string>, line 3) -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = (str1:bat[:oid,:str], str2:bat[:oid,:str]) := pyapi.eval(nil:ptr,"x = unicode(\"hello\")\nreturn(numpy.array([[x.encode(\"utf32\")], [x.encode(\"utf32\")]]))"); ERROR = !MALException:pyapi.eval:Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object. -MAPI = (monetdb) /var/tmp/mtest-25907/.s.monetdb.33825 +MAPI = (monetdb) /var/tmp/mtest-28082/.s.monetdb.37574 QUERY = (str1:bat[:oid,:str], str2:bat[:oid,:str]) := pyapi.eval(nil:ptr,"return(numpy.array([[\"Hëllo\", \"Hello Again\"], [\"Hello Again Again\",\"That's quite enough.\"]]))"); ERROR = !MALException:pyapi.eval:Invalid string encoding used. Please return a regular ASCII string, or a Numpy_Unicode object. diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -71,6 +71,15 @@ const char* debug_enableflag = "enable_p GDKfree(var); \ } +const char * pyarg_tabwidth[] = {"TABWIDTH", "MULTIPROCESSING"}; + +struct _ParseArguments +{ + int tab_width; + bool multiprocessing; +}; +#define ParseArguments struct _ParseArguments + struct _ReturnBatDescr { int npy_type; //npy type @@ -109,6 +118,8 @@ int PyAPIEnabled(void) { } +char* FormatCode(char* code, char **args, size_t argcount, size_t tabwidth); + // TODO: exclude pyapi from mergetable, too static MT_Lock pyapiLock; static MT_Lock pyapiSluice; @@ -255,7 +266,7 @@ static int pyapiInitialized = FALSE; goto wrapup; \ } \ data = (char*) ret->array_data; \ - if (zero_copy && ret->count > 0 && TYPE_##mtpe == PyType_ToBat(ret->result_type) && (ret->count * ret->memory_size < BUN_MAX) && \ + if (option_zerocopy && ret->count > 0 && TYPE_##mtpe == PyType_ToBat(ret->result_type) && (ret->count * ret->memory_size < BUN_MAX) && \ (ret->numpy_array == NULL || PyArray_FLAGS(ret->numpy_array) & NPY_ARRAY_OWNDATA)) \ { \ /*We can only create a direct map if the numpy array type and target BAT type*/ \ @@ -344,19 +355,14 @@ typedef enum { } pyapi_scan_state; bool PyType_IsPyScalar(PyObject *object); -char *PyError_CreateException(char *error_text, char *error_text_2); +char *PyError_CreateException(char *error_text, char *pycall); str PyAPIeval(MalBlkPtr mb, MalStkPtr stk, InstrPtr pci, bit grouped, bit mapped) { sql_func * sqlfun = *(sql_func**) getArgReference(stk, pci, pci->retc); str exprStr = *getArgReference_str(stk, pci, pci->retc + 1); int i = 1, ai = 0; - char argbuf[64]; - char argnames[1000] = ""; - size_t pos; char* pycall = NULL; - char *expr_ind = NULL; - size_t pycalllen, expr_ind_len; str *args; char *msg = MAL_SUCCEED; BAT *b = NULL; @@ -368,7 +374,11 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st PyReturn *pyreturn_values = NULL; PyInput *pyinput_values = NULL; int seqbase = 0; - bool zero_copy = !(GDKgetenv_isyes(zerocopy_disableflag) || GDKgetenv_istrue(zerocopy_disableflag)); + + bool option_verbose = GDKgetenv_isyes(verbose_enableflag) || GDKgetenv_istrue(verbose_enableflag); + bool option_debug = GDKgetenv_isyes(debug_enableflag) || GDKgetenv_istrue(debug_enableflag); + bool option_zerocopy = !(GDKgetenv_isyes(zerocopy_disableflag) || GDKgetenv_istrue(zerocopy_disableflag)); + (void) option_verbose; (void) option_debug; #ifndef WIN32 bool single_fork = mapped == 1; int shm_id = -1; @@ -383,32 +393,38 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st int j; size_t iu; - if (!PyAPIEnabled()) - { + if (!PyAPIEnabled()) { throw(MAL, "pyapi.eval", "Embedded Python has not been enabled. Start server with --set %s=true", pyapi_enableflag); } VERBOSE_MESSAGE("PyAPI Start\n"); - pycalllen = strlen(exprStr) + sizeof(argnames) + 1000; - expr_ind_len = strlen(exprStr) + 1000; - pycall = GDKzalloc(pycalllen); - expr_ind = GDKzalloc(expr_ind_len); + args = (str*) GDKzalloc(pci->argc * sizeof(str)); + if (args == NULL) { + throw(MAL, "pyapi.eval", MAL_MALLOC_FAIL); + } pyreturn_values = GDKzalloc(pci->retc * sizeof(PyReturn)); - pyinput_values = GDKzalloc((pci->argc - (pci->retc + 2)) * sizeof(PyInput)); - if (args == NULL || pycall == NULL || pyreturn_values == NULL) + if (pyreturn_values == NULL) { + GDKfree(args); + throw(MAL, "pyapi.eval", MAL_MALLOC_FAIL); + } + + if ((pci->argc - (pci->retc + 2)) * sizeof(PyInput) > 0) { - throw(MAL, "pyapi.eval", MAL_MALLOC_FAIL); - // TODO: free args and rcall + pyinput_values = GDKzalloc((pci->argc - (pci->retc + 2)) * sizeof(PyInput)); + + if (pyinput_values == NULL) { + GDKfree(args); GDKfree(pyreturn_values); + throw(MAL, "pyapi.eval", MAL_MALLOC_FAIL); + } } // first argument after the return contains the pointer to the sql_func structure - if (sqlfun != NULL && sqlfun->ops->cnt > 0) - { + if (sqlfun != NULL && sqlfun->ops->cnt > 0) { int cargs = pci->retc + 2; argnode = sqlfun->ops->h; while (argnode) { @@ -420,15 +436,13 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } // the first unknown argument is the group, we don't really care for the rest. - for (i = pci->retc + 2; i < pci->argc; i++) - { - if (args[i] == NULL) - { - if (!seengrp && grouped) - { + for (i = pci->retc + 2; i < pci->argc; i++) { + if (args[i] == NULL) { + if (!seengrp && grouped) { args[i] = GDKstrdup("aggr_group"); seengrp = TRUE; } else { + char argbuf[64]; snprintf(argbuf, sizeof(argbuf), "arg%i", i - pci->retc - 1); args[i] = GDKstrdup(argbuf); } @@ -438,101 +452,9 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st VERBOSE_MESSAGE("Formatting python code.\n"); - // create argument list - pos = 0; - for (i = pci->retc + 2; i < pci->argc && pos < sizeof(argnames); i++) - { - pos += snprintf(argnames + pos, sizeof(argnames) - pos, "%s%s", args[i], i < pci->argc - 1 ? ", " : ""); - } - if (pos >= sizeof(argnames)) - { - msg = createException(MAL, "pyapi.eval", "Command too large"); - goto wrapup; - } - - { - // indent every line in the expression by one level, - // if we find newline-tab, use tab, space otherwise - // two passes, first inserts null placeholder, second replaces - // need to be careful, newline might be in a quoted string - // this does not handle multi-line strings starting with """ (yet?) - pyapi_scan_state state = SEENNL; - char indentchar = 0; - size_t py_pos, py_ind_pos = 0; - - if (strlen(exprStr) > 0 && exprStr[0] == '{') - exprStr[0] = ' '; - if (strlen(exprStr) > 2 && exprStr[strlen(exprStr) - 2] == '}') - exprStr[strlen(exprStr) - 2] = ' '; - - for (py_pos = 0; py_pos < strlen(exprStr); py_pos++) - { - if (exprStr[py_pos] == ';') - exprStr[py_pos] = ' '; - } - - for (py_pos = 0; py_pos < strlen(exprStr); py_pos++) { - // +1 because we need space for the \0 we append below. - if (py_ind_pos + 1 > expr_ind_len) { - msg = createException(MAL, "pyapi.eval", "Overflow in re-indentation"); - goto wrapup; - } - switch(state) { - case NORMAL: - if (exprStr[py_pos] == '\'' || exprStr[py_pos] == '"') - { - state = INQUOTES; - } - if (exprStr[py_pos] == '\n') - { - state = SEENNL; - } - break; - - case INQUOTES: - if (exprStr[py_pos] == '\\') - { - state = ESCAPED; - } - if (exprStr[py_pos] == '\'' || exprStr[py_pos] == '"') - { - state = NORMAL; - } - break; - - case ESCAPED: - state = INQUOTES; - break; - - case SEENNL: - if (exprStr[py_pos] == ' ' || exprStr[py_pos] == '\t') - { - indentchar = exprStr[py_pos]; - } _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list