Changeset: 04bbdfcc8eb7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=04bbdfcc8eb7
Modified Files:
        monetdb5/extras/pyapi/bytearray.c
        monetdb5/extras/pyapi/pyapi.c
        monetdb5/extras/pyapi/type_conversion.c
        monetdb5/extras/pyapi/unicode.c
Branch: pyapi
Log Message:

Fix memory leak when using NPY_OBJECT strings.


diffs (233 lines):

diff --git a/monetdb5/extras/pyapi/bytearray.c 
b/monetdb5/extras/pyapi/bytearray.c
--- a/monetdb5/extras/pyapi/bytearray.c
+++ b/monetdb5/extras/pyapi/bytearray.c
@@ -111,6 +111,20 @@ int replace_method(char *name, PyCFuncti
        return index;
 }
 
+static void
+bytearray_dealloc(PyByteArrayObject *self)
+{
+    if (self->ob_alloc != 0 && self->ob_exports > 0) {
+        PyErr_SetString(PyExc_SystemError,
+                        "deallocated bytearray object has exported buffers");
+        PyErr_Print();
+    }
+    if (self->ob_bytes != 0 && self->ob_alloc != 0) {
+        PyMem_Free(self->ob_bytes);
+    }
+    Py_TYPE(self)->tp_free((PyObject *)self);
+}
+
 void PyByteArray_Override(void)
 {
        // We override all the Python ByteArray methods that modify the Byte 
Array
@@ -131,4 +145,7 @@ void PyByteArray_Override(void)
     pop_index = replace_method("pop", (PyCFunction)bytearray_pop);
     // Extend: bytearray.extend('extend')
     extend_index = replace_method("extend", (PyCFunction)bytearray_extend);
+
+    //
+    (&PyByteArray_Type)->tp_dealloc = (destructor)bytearray_dealloc;
 }
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -405,7 +405,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
     BAT *b = NULL;
     node * argnode;
     int seengrp = FALSE;
-    PyObject *pArgs, *pResult = NULL; // this is going to be the parameter 
tuple
+    PyObject *pArgs = NULL, *pResult = NULL; // this is going to be the 
parameter tuple
     PyObject *code_object = NULL;
     BUN p = 0, q = 0;
     BATiter li;
@@ -492,14 +492,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
         }
     }
 
-
-    VERBOSE_MESSAGE("Formatting python code.\n");
-
-    pycall = FormatCode(exprStr, args, pci->argc, 4, &code_object);
-    if (pycall == NULL && code_object == NULL) {
-        throw(MAL, "pyapi.eval", MAL_MALLOC_FAIL);
-    }
-
     //input analysis
     for (i = pci->retc + 2; i < pci->argc; i++) 
     {
@@ -734,6 +726,14 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
 #endif
     }
 
+
+    VERBOSE_MESSAGE("Formatting python code.\n");
+
+    pycall = FormatCode(exprStr, args, pci->argc, 4, &code_object);
+    if (pycall == NULL && code_object == NULL) {
+        throw(MAL, "pyapi.eval", MAL_MALLOC_FAIL);
+    }
+    
     VERBOSE_MESSAGE("Loading data from the database into Python.\n");
 
     // Now we will do the input handling (aka converting the input BATs to 
numpy arrays)
@@ -977,7 +977,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                             j++;
                         }
                     }
-                    j = 0;
 
                     j = 0;
                     BATloop(b, p, q)
@@ -1005,7 +1004,8 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                                 msg = createException(MAL, "pyapi.eval", 
"Failed to create string.");
                                 goto wrapup;
                             }
-                            PyArray_SETITEM((PyArrayObject*)vararray, 
PyArray_GETPTR1((PyArrayObject*)vararray, j), obj);
+                            PyArray_SETITEM((PyArrayObject*)vararray, 
PyArray_GETPTR1((PyArrayObject*)vararray, j - t_start), obj);
+                            Py_DECREF(obj);
                         }
                         if (j == t_end) break;
                         j++;
@@ -1037,6 +1037,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                     const hge *t = (const hge *) BUNtail(li, p);
                     obj = PyLong_FromHge(*t);
                     PyArray_SETITEM((PyArrayObject*)vararray, 
PyArray_GETPTR1((PyArrayObject*)vararray, j), obj);
+                    Py_DECREF(obj);
                     j++;
                 }
                 break;
@@ -1089,6 +1090,8 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
         }
     }
 
+
+
     VERBOSE_MESSAGE("Executing python code.\n");
 
     // Now it is time to actually execute the python code
@@ -1129,8 +1132,9 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
 
         // The function has been successfully created/compiled, all that 
remains is to actually call the function
         pResult = PyObject_CallObject(pFunc, pArgs);
+
+        Py_DECREF(pArgs);
         Py_DECREF(pFunc);
-        Py_DECREF(pArgs);
 
         if (PyErr_Occurred()) {
             msg = PyError_CreateException("Python exception", pycall);
@@ -1611,8 +1615,24 @@ returnvalues:
                         }    
                         break;
                     case NPY_OBJECT:
+                    {
                         //The resulting array is an array of pointers to 
various python objects
                         //Because the python objects can be of any size, we 
need to allocate a different size utf8_string for every object
+                        //we will first loop over all the objects to get the 
maximum size needed, so we only need to do one allocation
+                        size_t utf8_size = 64;
+                        for (iu = 0; iu < ret->count; iu++) {
+                            size_t size = 64;
+                            PyObject *obj;
+                            if (mask != NULL && (mask[index_offset * 
ret->count + iu]) == TRUE) continue;
+                            obj = *((PyObject**) &data[(index_offset * 
ret->count + iu) * ret->memory_size]);
+                            if (PyString_CheckExact(obj) || 
PyByteArray_CheckExact(obj)) {
+                                size = Py_SIZE(obj);
+                            } else if (PyUnicode_CheckExact(obj)) {
+                                size = Py_SIZE(obj) * 4;
+                            }
+                            if (size > utf8_size) utf8_size = size;
+                        }
+                        utf8_string = GDKzalloc(utf8_size);
                         for (iu = 0; iu < ret->count; iu++) {          
                             if (mask != NULL && (mask[index_offset * 
ret->count + iu]) == TRUE) {                
                                 b->T->nil = 1;    
@@ -1622,34 +1642,27 @@ returnvalues:
                                 PyObject *obj = *((PyObject**) 
&data[(index_offset * ret->count + iu) * ret->memory_size]);
                                 if (PyString_CheckExact(obj)) {
                                     char *str = 
((PyStringObject*)obj)->ob_sval;
-                                    //printf("%s\n", str);
-                                    utf8_string = GDKzalloc(strlen(str) * 4);
                                     if (!string_copy(str, utf8_string, 
strlen(str) + 1)) {
                                         msg = createException(MAL, 
"pyapi.eval", "Invalid string encoding used. Please return a regular ASCII 
string, or a Numpy_Unicode object.\n");       
                                         goto wrapup;    
                                     }
                                 } else if (PyByteArray_CheckExact(obj)) {
                                     char *str = 
((PyByteArrayObject*)obj)->ob_bytes;
-                                    //printf("%s\n", str);
-                                    utf8_string = GDKzalloc(strlen(str) * 4);
                                     if (!string_copy(str, utf8_string, 
strlen(str) + 1)) {
                                         msg = createException(MAL, 
"pyapi.eval", "Invalid string encoding used. Please return a regular ASCII 
string, or a Numpy_Unicode object.\n");       
                                         goto wrapup;    
                                     }
                                 } else if (PyUnicode_CheckExact(obj)) {
                                     Py_UNICODE *str = 
(Py_UNICODE*)((PyUnicodeObject*)obj)->str;
-                                    utf8_string = 
GDKzalloc(((PyUnicodeObject*)obj)->length * 4);
                                     utf32_to_utf8(0, 
((PyUnicodeObject*)obj)->length, utf8_string, str);
                                 } else if (PyBool_Check(obj) || 
PyLong_Check(obj) || PyInt_Check(obj) || PyFloat_Check(obj)) { 
 #ifdef HAVE_HGE
                                     hge h;
                                     py_to_hge(obj, &h);
-                                    utf8_string = GDKzalloc(64);
                                     hge_to_string(utf8_string, h);
 #else
                                     lng h;
                                     py_to_lng(obj, &h);
-                                    utf8_string = GDKzalloc(32);
                                     lng_to_string(utf8_string, h);
 #endif
                                 } else {
@@ -1657,17 +1670,15 @@ returnvalues:
                                     goto wrapup; 
                                 }
                                 BUNappend(b, utf8_string, FALSE); 
-                                GDKfree(utf8_string);
                             }                                                  
     
                         }
                         break;
+                    }
                     default:
                         msg = createException(MAL, "pyapi.eval", "Unrecognized 
type. Could not convert to NPY_UNICODE.\n");       
                         goto wrapup;    
-                }                   
-                if (ret->result_type != NPY_OBJECT) {           
-                    GDKfree(utf8_string);   
-                }    
+                }                           
+                GDKfree(utf8_string);   
                                                     
                 b->T->nonil = 1 - b->T->nil;                                   
               
                 BATsetcount(b, ret->count);                                    
                 
diff --git a/monetdb5/extras/pyapi/type_conversion.c 
b/monetdb5/extras/pyapi/type_conversion.c
--- a/monetdb5/extras/pyapi/type_conversion.c
+++ b/monetdb5/extras/pyapi/type_conversion.c
@@ -67,7 +67,7 @@ void dbl_to_string(char* str, dbl value)
 int hge_to_string(char * str, hge x)
 {
     int i = 0;
-    size_t size = 2;
+    size_t size = 1;
     hge cpy = x > 0 ? x : -x;
     while(cpy > 0) {
         cpy /= 10;
@@ -80,7 +80,7 @@ int hge_to_string(char * str, hge x)
         str[0] = '-';
     }
     str[size - 1] = '\0';
-    i = size - 2;
+    i = size - 1;
     while(x > 0)
     {
         int v = x % 10;
diff --git a/monetdb5/extras/pyapi/unicode.c b/monetdb5/extras/pyapi/unicode.c
--- a/monetdb5/extras/pyapi/unicode.c
+++ b/monetdb5/extras/pyapi/unicode.c
@@ -90,6 +90,7 @@ bool utf32_to_utf8(size_t offset, size_t
         if (shift < 0) return false;
         position += shift;
     }
+    utf8_storage[position] = '\0';
     return true;
 }
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to