Changeset: be2c60d4445d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=be2c60d4445d Modified Files: monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out monetdb5/extras/pyapi/pyapi.c monetdb5/extras/pyapi/type_conversion.c monetdb5/extras/pyapi/type_conversion.h Branch: pyapi Log Message:
Strings can now be stored in either a NPY_STRING (big char array) or NPY_OBJECT (pointers) array, depending on the numpy_string_array flag. diffs (truncated from 745 to 300 lines): diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC --- a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC +++ b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.malC @@ -23,6 +23,18 @@ io.print(rhge, shge); (rhge:bat[:oid,:hge], shge:bat[:oid,:hge]) := pyapi.eval(nil:ptr,"return(numpy.array([[3200.3,12.7],[44.1,22.8]]))",bhge); io.print(rhge, shge); +# convert hge to string +rhge:bat[:oid,:str] := pyapi.eval(nil:ptr,"return(arg1)",bhge); +io.print(rhge); + +# convert string to hge +bstr:= bat.new(:oid,:str); +bat.append(bstr,"412412":str); +bat.append(bstr,"13231414":str); +bat.append(bstr,"895233278923448975389573895731":str); +rhge:bat[:oid,:hge] := pyapi.eval(nil:ptr,"return(arg1)", bstr); +io.print(rhge); + # return multidimensional huge (rhge:bat[:oid,:hge], shge:bat[:oid,:hge]) := pyapi.eval(nil:ptr,"return(numpy.ma.masked_array([arg1, arg1], [arg1.mask,arg1.mask]))",bhge); @@ -54,3 +66,4 @@ io.print(rint); # convert huge to double rdbl:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return(arg1)",z); io.print(rdbl); + diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err --- a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err +++ b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.err @@ -33,6 +33,7 @@ stderr of test 'pyapi_types_huge` in dir !WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. !WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. !WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. +!WARNING: Type "hge" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type "long". This is likely very slow. # 00:29:47 > # 00:29:47 > "mclient" "-lmal" "-ftest" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-30800" "--port=32717" diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out --- a/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out +++ b/monetdb5/extras/pyapi/Tests/pyapi_types_huge.stable.out @@ -60,6 +60,24 @@ Ready. #--------------------------# [ 0@0, 3200, 44 ] [ 1@0, 12, 22 ] +#io.print(rhge); +#--------------------------# +# h t # name +# void str # type +#--------------------------# +[ 0@0, "18044433428933534654634643698858345" ] +[ 1@0, "895233278923448975389573895731" ] +[ 2@0, "558372892789247104910348981249" ] +[ 3@0, "-23" ] +[ 4@0, nil ] +#io.print(rhge); +#--------------------------# +# h t # name +# void hge # type +#--------------------------# +[ 0@0, 412412 ] +[ 1@0, 13231414 ] +[ 2@0, 895233278923448975389573895731 ] #io.print(rhge, shge); #--------------------------# # h t t # name diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c --- a/monetdb5/extras/pyapi/pyapi.c +++ b/monetdb5/extras/pyapi/pyapi.c @@ -119,7 +119,6 @@ int PyAPIEnabled(void) { char* FormatCode(char* code, char **args, size_t argcount, size_t tabwidth); -// TODO: exclude pyapi from mergetable, too static MT_Lock pyapiLock; static MT_Lock pyapiSluice; static int pyapiInitialized = FALSE; @@ -287,7 +286,7 @@ static int pyapiInitialized = FALSE; else \ { \ bat = BATnew(TYPE_void, TYPE_##mtpe, ret->count, TRANSIENT); \ - BATseqbase(bat, seqbase); bat->T->nil = 0; bat->T->nonil = 1; \ + BATseqbase(bat, seqbase); bat->T->nil = 0; bat->T->nonil = 1; \ bat->tkey = 0; bat->tsorted = 0; bat->trevsorted = 0; \ switch(ret->result_type) \ { \ @@ -308,6 +307,7 @@ static int pyapiInitialized = FALSE; case NPY_LONGDOUBLE: NP_COL_BAT_LOOP(bat, mtpe, dbl); break; \ case NPY_STRING: NP_COL_BAT_LOOP_FUNC(bat, mtpe, str_to_##mtpe); break; \ case NPY_UNICODE: NP_COL_BAT_LOOP_FUNC(bat, mtpe, unicode_to_##mtpe); break; \ + case NPY_OBJECT: NP_COL_BAT_LOOP_FUNC(bat, mtpe, pyobject_to_##mtpe); break; \ default: \ msg = createException(MAL, "pyapi.eval", "Unrecognized type. Could not convert to %s.\n", BatType_Format(TYPE_##mtpe)); \ goto wrapup; \ @@ -374,7 +374,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st PyInput *pyinput_values = NULL; int seqbase = 0; - bool numpy_string_array = true; + bool numpy_string_array = false; bool option_verbose = GDKgetenv_isyes(verbose_enableflag) || GDKgetenv_istrue(verbose_enableflag); bool option_debug = GDKgetenv_isyes(debug_enableflag) || GDKgetenv_istrue(debug_enableflag); bool option_zerocopy = !(GDKgetenv_isyes(zerocopy_disableflag) || GDKgetenv_istrue(zerocopy_disableflag)); @@ -736,13 +736,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st break; #ifdef HAVE_HGE case TYPE_hge: - { - char hex[40]; - const hge *t = (const hge *) inp->dataptr; - hge_to_string(hex, 40, *t); - //then we create a PyLong from that string by parsing it - vararray = PyLong_FromString(hex, NULL, 16); - } + vararray = PyLong_FromHge(*((hge *) inp->dataptr)); break; #endif case TYPE_str: @@ -911,45 +905,57 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } } else { - // TODO: This - // NPY_OBJECT array - // vararray = PyArray_New( - // &PyArray_Type, - // 1, - // (npy_intp[1]) {count}, - // NPY_OBJECT, - // NULL, - // NULL, - // 0, - // 0, - // NULL); - // j = 0; - // BATloop(b, p, q) - // { - // if (j >= t_start) { - // char *t = (char *) BUNtail(li, p); - // PyObject *obj; - // if (strcmp(t, str_nil) == 0) { - // //str_nil isn't a valid UTF-8 character (it's 0x80), so we can't decode it as UTF-8 (it will throw an error) - // obj = PyString_FromString("-"); - // } - // else { - // //otherwise we can just decode the string as UTF-8 - // obj = PyString_FromString(t); - // } + bool ascii; + li = bat_iterator(b); + count = inp->count; + //create a NPY_OBJECT array object + vararray = PyArray_New( + &PyArray_Type, + 1, + (npy_intp[1]) {t_end - t_start}, + NPY_OBJECT, + NULL, + NULL, + 0, + 0, + NULL); + j = 0; + BATloop(b, p, q) + { + if (j >= t_start) { + char *t = (char *) BUNtail(li, p); + PyObject *obj; + utf8_strlen(t, &ascii); + if (!ascii) { + if (strcmp(t, str_nil) == 0) { + //str_nil isn't a valid UTF-8 character (it's 0x80), so we can't decode it as UTF-8 (it will throw an error) + obj = PyUnicode_FromString("-"); + } + else { + //otherwise we can just decode the string as UTF-8 + obj = PyUnicode_FromString(t); + } + } else { + if (strcmp(t, str_nil) == 0) { + //str_nil isn't a valid UTF-8 character (it's 0x80), so we can't decode it as UTF-8 (it will throw an error) + obj = PyString_FromString("-"); + } + else { + //otherwise we can just decode the string as UTF-8 + obj = PyString_FromString(t); + } + } - // if (obj == NULL) - // { - // PyErr_Print(); - // msg = createException(MAL, "pyapi.eval", "Failed to decode string as UTF-8."); - // goto wrapup; - // } - // PyArray_SETITEM((PyArrayObject*)vararray, PyArray_GETPTR1((PyArrayObject*)vararray, j), obj); - // } - // if (j == t_end) break; - // j++; - // } - // PyArray_INCREF((PyArrayObject*)vararray); + if (obj == NULL) + { + msg = createException(MAL, "pyapi.eval", "Failed to create string."); + goto wrapup; + } + PyArray_SETITEM((PyArrayObject*)vararray, PyArray_GETPTR1((PyArrayObject*)vararray, j), obj); + } + if (j == t_end) break; + j++; + } } break; #ifdef HAVE_HGE @@ -966,24 +972,16 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st NPY_OBJECT, NULL, NULL, - 128, //128 bits per value - 0, + 0, + 0, NULL); j = 0; fprintf(stderr, "!WARNING: Type \"hge\" (128 bit) is unsupported by Numpy. The numbers are instead converted to python objects of type \"long\". This is likely very slow.\n"); BATloop(b, p, q) { - char hex[40]; PyObject *obj; const hge *t = (const hge *) BUNtail(li, p); - hge_to_string(hex, 40, *t); - //then we create a PyLong from that string by parsing it - obj = PyLong_FromString(hex, NULL, 16); - if (obj == NULL) { - PyErr_Print(); - msg = createException(MAL, "pyapi.eval", "Failed to convert huge array."); - goto wrapup; - } + obj = PyLong_FromHge(*t); PyArray_SETITEM((PyArrayObject*)vararray, PyArray_GETPTR1((PyArrayObject*)vararray, j), obj); j++; } @@ -1171,37 +1169,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st } } } - if (!PyList_Check(pResult)) { - //check if the result is a multi-dimensional numpy array of type NPY_OBJECT - //if the result object is a multi-dimensional numpy array of type NPY_OBJECT, we convert it to NPY_STRING because we don't know how to handle NPY_OBJECT arrays otherwise (they could contain literally anything) - if (PyType_IsNumpyMaskedArray(pResult)) { - PyObject *data, *mask; - data = PyObject_GetAttrString(pResult, "data"); - if (PyArray_NDIM((PyArrayObject*)data) != 1 && PyArray_DESCR((PyArrayObject*)data)->type_num == NPY_OBJECT) { - //if it's a masked array we have to copy the mask along with converting the data to NPY_STRING - PyObject *mafunc, *maargs; - PyObject *tp = PyArray_FromAny(pResult, PyArray_DescrFromType(NPY_STRING), 0, 0, NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, NULL); - mask = PyObject_GetAttrString(pResult, "mask"); - - mafunc = PyObject_GetAttrString(PyImport_Import(PyString_FromString("numpy.ma")), "masked_array"); - maargs = PyTuple_New(2); - PyTuple_SetItem(maargs, 0, tp); - PyTuple_SetItem(maargs, 1, mask); - mask = PyObject_CallObject(mafunc, maargs); - Py_DECREF(pResult); - Py_DECREF(mafunc); - pResult = mask; - } - } - else { - if (PyArray_NDIM((PyArrayObject*)pResult) != 1 && PyArray_DESCR((PyArrayObject*)pResult)->type_num == NPY_OBJECT) { - //if it's not a masked array we just convert the data to NPY_STRING - PyObject *tp = PyArray_FromAny(pResult, PyArray_DescrFromType(NPY_STRING), 0, 0, NPY_ARRAY_CARRAY | NPY_ARRAY_FORCECAST, NULL); - Py_DECREF(pResult); - pResult = tp; - } - } - } PyRun_SimpleString("del pyfun"); } else { @@ -1230,7 +1197,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st int bat_type = ATOMstorage(getColumnType(getArgType(mb,pci,i))); ret->multidimensional = FALSE; - // There are three possibilities (we have ensured this right after executing the Python call) // 1: The top level result object is a PyList or Numpy Array containing pci->retc Numpy Arrays // 2: The top level result object is a (pci->retc x N) dimensional Numpy Array [Multidimensional] @@ -1258,7 +1224,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st pColO = PyArray_GETITEM((PyArrayObject*)data, PyArray_GETPTR1((PyArrayObject*)data, i)); } } - // Now we have to do some preprocessing on the data if (ret->multidimensional) { // If it is a multidimensional Numpy array, we don't need to do any conversion, we can just do some pointers @@ -1272,7 +1237,8 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st else { // If it isn't we need to convert pColO to the expected Numpy Array type ret->numpy_array = NULL; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list