Changeset: 3cb28e873fd1 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=3cb28e873fd1
Added Files:
        sql/backends/monet5/Tests/pyloader05.sql
        sql/backends/monet5/Tests/pyloader05.stable.err
        sql/backends/monet5/Tests/pyloader05.stable.out
Modified Files:
        monetdb5/extras/pyapi/convert_loops.h
        monetdb5/extras/pyapi/emit.c
        monetdb5/extras/pyapi/pyapi.c
        monetdb5/extras/pyapi/pyapi.h
        sql/backends/monet5/Tests/All
Branch: pythonloader
Log Message:

Fix importing string arrays and add testcase.


diffs (truncated from 446 to 300 lines):

diff --git a/monetdb5/extras/pyapi/convert_loops.h 
b/monetdb5/extras/pyapi/convert_loops.h
--- a/monetdb5/extras/pyapi/convert_loops.h
+++ b/monetdb5/extras/pyapi/convert_loops.h
@@ -176,7 +176,7 @@
             if (mask[index_offset * ret->count + iu] == TRUE)                  
                                                                       \
             {                                                                  
                                                                       \
                 bat->T->nil = 1;                                               
                                                                       \
-                BUNappend(b, str_nil, FALSE);                                  
                                                                       \
+                BUNappend(bat, str_nil, FALSE);                                
                                                                         \
             }                                                                  
                                                                       \
             else                                                               
                                                                       \
             {                                                                  
                                                                       \
@@ -215,6 +215,84 @@
     }                                                                          
                                                                         \
     bat->T->nonil = 1 - bat->T->nil;  }                                        
                                                                         \
 
+#define NP_INSERT_STRING_BAT(b) \
+       switch(ret->result_type)                                                
                                                                                
              \
+       {                                                                       
                                                                                
              \
+           case NPY_BOOL:      NP_COL_BAT_STR_LOOP(b, bit, "%hhd"); break;     
                                                                                
              \
+           case NPY_BYTE:      NP_COL_BAT_STR_LOOP(b, bte, "%hhd"); break;     
                                                                                
              \
+           case NPY_SHORT:     NP_COL_BAT_STR_LOOP(b, sht, "%hd"); break;      
                                                                                
              \
+           case NPY_INT:       NP_COL_BAT_STR_LOOP(b, int, "%d"); break;       
                                                                                
              \
+           case NPY_LONG:      NP_COL_BAT_STR_LOOP(b, long, "%ld"); break;     
                                                                                
              \
+           case NPY_LONGLONG:  NP_COL_BAT_STR_LOOP(b, lng, LLFMT); break;      
                                                                                
              \
+           case NPY_UBYTE:     NP_COL_BAT_STR_LOOP(b, unsigned char, "%hhu"); 
break;                                                                          
               \
+           case NPY_USHORT:    NP_COL_BAT_STR_LOOP(b, unsigned short, "%hu"); 
break;                                                                          
               \
+           case NPY_UINT:      NP_COL_BAT_STR_LOOP(b, unsigned int, "%u"); 
break;                                                                          
                  \
+           case NPY_ULONG:     NP_COL_BAT_STR_LOOP(b, unsigned long, "%lu"); 
break;                                                                          
                \
+           case NPY_ULONGLONG: NP_COL_BAT_STR_LOOP(b, unsigned long long, 
ULLFMT); break;                                                                 
                   \
+           case NPY_FLOAT16:                                                   
                                                                                
              \
+           case NPY_FLOAT:     NP_COL_BAT_STR_LOOP(b, flt, "%f"); break;       
                                                                                
              \
+           case NPY_DOUBLE:                                                    
                                                                                
              \
+           case NPY_LONGDOUBLE: NP_COL_BAT_STR_LOOP(b, dbl, "%lf"); break;     
                                                                                
              \
+           case NPY_STRING:                                                    
                                                                                
              \
+               for (iu = 0; iu < ret->count; iu++) {                           
                                                                                
              \
+                   if (mask != NULL && (mask[index_offset * ret->count + iu]) 
== TRUE) {                                                                      
               \
+                       b->T->nil = 1;                                          
                                                                                
              \
+                       BUNappend(b, str_nil, FALSE);                           
                                                                                
              \
+                   }  else {                                                   
                                                                                
              \
+                       if (!string_copy(&data[(index_offset * ret->count + iu) 
* ret->memory_size], utf8_string, ret->memory_size, true)) {                    
              \
+                           msg = createException(MAL, "pyapi.eval", "Invalid 
string encoding used. Please return a regular ASCII string, or a Numpy_Unicode 
object.\n");     \
+                           goto wrapup;                                        
                                                                                
              \
+                       }                                                       
                                                                                
              \
+                       BUNappend(b, utf8_string, FALSE);                       
                                                                                
              \
+                   }                                                           
                                                                                
              \
+               }                                                               
                                                                                
              \
+               break;                                                          
                                                                                
              \
+           case NPY_UNICODE:                                                   
                                                                                
              \
+               for (iu = 0; iu < ret->count; iu++) {                           
                                                                                
              \
+                   if (mask != NULL && (mask[index_offset * ret->count + iu]) 
== TRUE) {                                                                      
               \
+                       b->T->nil = 1;                                          
                                                                                
              \
+                       BUNappend(b, str_nil, FALSE);                           
                                                                                
              \
+                   }  else {                                                   
                                                                                
              \
+                       utf32_to_utf8(0, ret->memory_size / 4, utf8_string, 
(const Py_UNICODE*)(&data[(index_offset * ret->count + iu) * 
ret->memory_size]));                 \
+                       BUNappend(b, utf8_string, FALSE);                       
                                                                                
              \
+                   }                                                           
                                                                                
              \
+               }                                                               
                                                                                
              \
+               break;                                                          
                                                                                
              \
+           case NPY_OBJECT:                                                    
                                                                                
              \
+           {                                                                   
                                                                                
              \
+               /* The resulting array is an array of pointers to various 
python objects */                                                               
                    \
+               /* Because the python objects can be of any size, we need to 
allocate a different size utf8_string for every object */                       
                 \
+               /* we will first loop over all the objects to get the maximum 
size needed, so we only need to do one allocation */                            
                \
+               size_t utf8_size = utf8string_minlength;                        
                                                                                
              \
+               for (iu = 0; iu < ret->count; iu++) {                           
                                                                                
              \
+                   size_t size = utf8string_minlength;                         
                                                                                
              \
+                   PyObject *obj;                                              
                                                                                
              \
+                   if (mask != NULL && (mask[index_offset * ret->count + iu]) 
== TRUE) continue;                                                              
               \
+                   obj = *((PyObject**) &data[(index_offset * ret->count + iu) 
* ret->memory_size]);                                                           
              \
+                   size = pyobject_get_size(obj);                              
                                                                                
              \
+                   if (size > utf8_size) utf8_size = size;                     
                                                                                
              \
+               }                                                               
                                                                                
              \
+               utf8_string = GDKzalloc(utf8_size);                             
                                                                                
              \
+               for (iu = 0; iu < ret->count; iu++) {                           
                                                                                
              \
+                   if (mask != NULL && (mask[index_offset * ret->count + iu]) 
== TRUE) {                                                                      
               \
+                       b->T->nil = 1;                                          
                                                                                
              \
+                       BUNappend(b, str_nil, FALSE);                           
                                                                                
              \
+                   } else {                                                    
                                                                                
              \
+                       /* we try to handle as many types as possible */        
                                                                                
              \
+                       pyobject_to_str(((PyObject**) &data[(index_offset * 
ret->count + iu) * ret->memory_size]), utf8_size, &utf8_string);                
                  \
+                       BUNappend(b, utf8_string, FALSE);                       
                                                                                
              \
+                   }                                                           
                                                                                
              \
+               }                                                               
                                                                                
              \
+               break;                                                          
                                                                                
              \
+           }                                                                   
                                                                                
              \
+           default:                                                            
                                                                                
              \
+               msg = createException(MAL, "pyapi.eval", "Unrecognized type. 
Could not convert to NPY_UNICODE.\n");                                          
                 \
+               goto wrapup;                                                    
                                                                                
              \
+       }                                                                       
                                                                                
              \
+       b->T->nonil = 1 - b->T->nil;                                            
                                                                                
              \
+                                                                               
                                                                                
              \
+
+
 #ifdef HAVE_HGE
 #define NOT_HGE(mtpe) TYPE_##mtpe != TYPE_hge
 #else
diff --git a/monetdb5/extras/pyapi/emit.c b/monetdb5/extras/pyapi/emit.c
--- a/monetdb5/extras/pyapi/emit.c
+++ b/monetdb5/extras/pyapi/emit.c
@@ -4,6 +4,7 @@
 #include "interprocess.h"
 
 #include "convert_loops.h"
+#include "unicode.h"
 
 #if PY_MAJOR_VERSION >= 3
 #define IS_PY3K
@@ -202,6 +203,16 @@ loop_end:
                         break;
                 #endif
                     case TYPE_str:
+                    {
+                        char *utf8_string = NULL;
+                        if (ret->result_type != NPY_OBJECT) {
+                            utf8_string = GDKzalloc(utf8string_minlength + 
ret->memory_size + 1);
+                            utf8_string[utf8string_minlength + 
ret->memory_size] = '\0';
+                        }
+                        NP_INSERT_STRING_BAT(self->cols[i].b);
+                        if (utf8_string) GDKfree(utf8_string);
+                    }
+                        break;
                     default:
                         PyErr_Format(PyExc_TypeError, "Unsupported BAT Type 
%s", BatType_Format(self->cols[i].b->T->type));
                         return NULL;
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -51,8 +51,6 @@ bool option_warning;
 static PyObject *marshal_module = NULL;
 PyObject *marshal_loads = NULL;
 
-const int utf8string_minlength = 256;
-
 int PyAPIEnabled(void) {
     return (GDKgetenv_istrue(pyapi_enableflag)
             || GDKgetenv_isyes(pyapi_enableflag));
@@ -2212,82 +2210,8 @@ BAT *PyObject_ConvertToBAT(PyReturn *ret
             BATseqbase(b, seqbase); b->T->nil = 0; b->T->nonil = 1;
             b->tkey = 0; b->tsorted = 0; b->trevsorted = 0;
             VERBOSE_MESSAGE("- Collecting return values of type %s.\n", 
PyType_Format(ret->result_type));
-            switch(ret->result_type)
-            {
-                case NPY_BOOL:      NP_COL_BAT_STR_LOOP(b, bit, "%hhd"); break;
-                case NPY_BYTE:      NP_COL_BAT_STR_LOOP(b, bte, "%hhd"); break;
-                case NPY_SHORT:     NP_COL_BAT_STR_LOOP(b, sht, "%hd"); break;
-                case NPY_INT:       NP_COL_BAT_STR_LOOP(b, int, "%d"); break;
-                case NPY_LONG:      NP_COL_BAT_STR_LOOP(b, long, "%ld"); break;
-                case NPY_LONGLONG:  NP_COL_BAT_STR_LOOP(b, lng, LLFMT); break;
-                case NPY_UBYTE:     NP_COL_BAT_STR_LOOP(b, unsigned char, 
"%hhu"); break;
-                case NPY_USHORT:    NP_COL_BAT_STR_LOOP(b, unsigned short, 
"%hu"); break;
-                case NPY_UINT:      NP_COL_BAT_STR_LOOP(b, unsigned int, 
"%u"); break;
-                case NPY_ULONG:     NP_COL_BAT_STR_LOOP(b, unsigned long, 
"%lu"); break;
-                case NPY_ULONGLONG: NP_COL_BAT_STR_LOOP(b, unsigned long long, 
ULLFMT); break;
-                case NPY_FLOAT16:
-                case NPY_FLOAT:     NP_COL_BAT_STR_LOOP(b, flt, "%f"); break;
-                case NPY_DOUBLE:
-                case NPY_LONGDOUBLE: NP_COL_BAT_STR_LOOP(b, dbl, "%lf"); break;
-                case NPY_STRING:
-                    for (iu = 0; iu < ret->count; iu++) {
-                        if (mask != NULL && (mask[index_offset * ret->count + 
iu]) == TRUE) {
-                            b->T->nil = 1;
-                            BUNappend(b, str_nil, FALSE);
-                        }  else {
-                            if (!string_copy(&data[(index_offset * ret->count 
+ iu) * ret->memory_size], utf8_string, ret->memory_size, true)) {
-                                msg = createException(MAL, "pyapi.eval", 
"Invalid string encoding used. Please return a regular ASCII string, or a 
Numpy_Unicode object.\n");
-                                goto wrapup;
-                            }
-                            BUNappend(b, utf8_string, FALSE);
-                        }
-                    }
-                    break;
-                case NPY_UNICODE:
-                    for (iu = 0; iu < ret->count; iu++) {
-                        if (mask != NULL && (mask[index_offset * ret->count + 
iu]) == TRUE) {
-                            b->T->nil = 1;
-                            BUNappend(b, str_nil, FALSE);
-                        }  else {
-                            utf32_to_utf8(0, ret->memory_size / 4, 
utf8_string, (const Py_UNICODE*)(&data[(index_offset * ret->count + iu) * 
ret->memory_size]));
-                            BUNappend(b, utf8_string, FALSE);
-                        }
-                    }
-                    break;
-                case NPY_OBJECT:
-                {
-                    //The resulting array is an array of pointers to various 
python objects
-                    //Because the python objects can be of any size, we need 
to allocate a different size utf8_string for every object
-                    //we will first loop over all the objects to get the 
maximum size needed, so we only need to do one allocation
-                    size_t utf8_size = utf8string_minlength;
-                    for (iu = 0; iu < ret->count; iu++) {
-                        size_t size = utf8string_minlength;
-                        PyObject *obj;
-                        if (mask != NULL && (mask[index_offset * ret->count + 
iu]) == TRUE) continue;
-                        obj = *((PyObject**) &data[(index_offset * ret->count 
+ iu) * ret->memory_size]);
-                        size = pyobject_get_size(obj);
-                        if (size > utf8_size) utf8_size = size;
-                    }
-                    utf8_string = GDKzalloc(utf8_size);
-                    for (iu = 0; iu < ret->count; iu++) {
-                        if (mask != NULL && (mask[index_offset * ret->count + 
iu]) == TRUE) {
-                            b->T->nil = 1;
-                            BUNappend(b, str_nil, FALSE);
-                        } else {
-                            //we try to handle as many types as possible
-                            pyobject_to_str(((PyObject**) &data[(index_offset 
* ret->count + iu) * ret->memory_size]), utf8_size, &utf8_string);
-                            BUNappend(b, utf8_string, FALSE);
-                        }
-                    }
-                    break;
-                }
-                default:
-                    msg = createException(MAL, "pyapi.eval", "Unrecognized 
type. Could not convert to NPY_UNICODE.\n");
-                    goto wrapup;
-            }
-            GDKfree(utf8_string);
-
-            b->T->nonil = 1 - b->T->nil;
+            NP_INSERT_STRING_BAT(b);
+            if (utf8_string) GDKfree(utf8_string);
             BATsetcount(b, (BUN) ret->count);
             BATsettrivprop(b);
             break;
diff --git a/monetdb5/extras/pyapi/pyapi.h b/monetdb5/extras/pyapi/pyapi.h
--- a/monetdb5/extras/pyapi/pyapi.h
+++ b/monetdb5/extras/pyapi/pyapi.h
@@ -120,6 +120,6 @@ str _loader_init(void);
 pyapi_export char *PyError_CreateException(char *error_text, char *pycall);
 
 #define pyapi_enableflag "embedded_py"
-
+#define utf8string_minlength 256
 
 #endif /* _PYPI_LIB_ */
diff --git a/sql/backends/monet5/Tests/All b/sql/backends/monet5/Tests/All
--- a/sql/backends/monet5/Tests/All
+++ b/sql/backends/monet5/Tests/All
@@ -55,6 +55,7 @@ HAVE_LIBPY?pyloader01
 HAVE_LIBPY?pyloader02
 HAVE_LIBPY?pyloader03
 HAVE_LIBPY?pyloader04
+HAVE_LIBPY?pyloader05
 
 
 # should this work?
diff --git a/sql/backends/monet5/Tests/pyloader05.sql 
b/sql/backends/monet5/Tests/pyloader05.sql
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/Tests/pyloader05.sql
@@ -0,0 +1,26 @@
+
+# test string returns
+START TRANSACTION;
+CREATE TABLE pyloader05table(s STRING);
+CREATE LOADER pyloader05() LANGUAGE PYTHON {
+    _emit.emit({'s': 33});
+    _emit.emit({'s': 42.0});
+    _emit.emit({'s': 'hello'});
+    _emit.emit({'s': u'\u00D6'}); # \u00D6 = O + umlaut
+    _emit.emit({'s': [33, 'hello']});
+    _emit.emit({'s': [42.0, 33]});
+    _emit.emit({'s': numpy.array(['hello', 'hello', 'hello'])});
+    _emit.emit({'s': [u'\u00D6', 'hello', 33]});
+    _emit.emit({'s': numpy.array([u'\u00D6', 'hello', 33])});
+    _emit.emit({'s': numpy.arange(3).astype(numpy.float32)});
+    _emit.emit({'s': numpy.arange(3).astype(numpy.float64)});
+    _emit.emit({'s': numpy.arange(3).astype(numpy.int8)});
+    _emit.emit({'s': numpy.arange(3).astype(numpy.int16)});
+    _emit.emit({'s': numpy.arange(3).astype(numpy.int32)});
+    _emit.emit({'s': numpy.arange(3).astype(numpy.int64)});
+};
+COPY INTO pyloader05table FROM LOADER pyloader05();
+SELECT * FROM pyloader05table;
+DROP TABLE pyloader05table;
+DROP LOADER pyloader05;
+ROLLBACK;
diff --git a/sql/backends/monet5/Tests/pyloader05.stable.err 
b/sql/backends/monet5/Tests/pyloader05.stable.err
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/Tests/pyloader05.stable.err
@@ -0,0 +1,36 @@
+stderr of test 'pyloader05` in directory 'sql/backends/monet5` itself:
+
+
+# 13:44:57 >  
+# 13:44:57 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_open=true" "--set" "mapi_port=34686" "--set" 
"mapi_usock=/var/tmp/mtest-17327/.s.monetdb.34686" "--set" "monet_prompt=" 
"--forcemito" 
"--dbpath=/home/mytherin/opt/var/MonetDB/mTests_sql_backends_monet5" "--set" 
"embedded_r=yes" "--set" "embedded_py=true"
+# 13:44:57 >  
+
+# builtin opt  gdk_dbpath = /home/mytherin/opt/var/monetdb5/dbfarm/demo
+# builtin opt  gdk_debug = 0
+# builtin opt  gdk_vmtrim = no
+# builtin opt  monet_prompt = >
+# builtin opt  monet_daemon = no
+# builtin opt  mapi_port = 50000
+# builtin opt  mapi_open = false
+# builtin opt  mapi_autosense = false
+# builtin opt  sql_optimizer = default_pipe
+# builtin opt  sql_debug = 0
+# cmdline opt  gdk_nr_threads = 0
+# cmdline opt  mapi_open = true
+# cmdline opt  mapi_port = 34686
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to