Changeset: 7d334306309a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/7d334306309a
Branch: iso
Log Message:

Merged with Jul2021


diffs (128 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -871,7 +871,40 @@ gdk_export size_t HEAPmemsize(Heap *h);
 gdk_export void HEAPdecref(Heap *h, bool remove);
 gdk_export void HEAPincref(Heap *h);
 
-/* BAT iterator, also protects use of BAT heaps with reference counts */
+/* BAT iterator, also protects use of BAT heaps with reference counts.
+ *
+ * A BAT iterator has to be used with caution, but it does have to be
+ * used in many place.
+ *
+ * An iterator is initialized by assigning it the result of a call to
+ * either bat_iterator or bat_iterator_nolock.  The former must be
+ * accompanied by a call to bat_iterator_end to release resources.
+ *
+ * bat_iterator should be used for BATs that could possibly be modified
+ * in another thread while we're reading the contents of the BAT.
+ * Alternatively, but only for very quick access, the theaplock can be
+ * taken, the data read, and the lock released.  For longer duration
+ * accesses, it is better to use the iterator, even without the BUNt*
+ * macros, since the theaplock is only held very briefly.
+ *
+ * If BATs are to be modified, higher level code must assure that no
+ * other thread is going to modify the same BAT at the same time.  A
+ * to-be-modified BAT should not use bat_iterator.  It can use
+ * bat_iterator_nolock, but be aware that this creates a copy of the
+ * heap pointer(s) (i.e. theap and tvheap) and if the heaps get
+ * extended, the pointers in the BAT structure may be modified, but that
+ * does not modify the pointers in the iterator.  This means that after
+ * operations that may grow a heap, the iterator should be
+ * reinitialized.
+ *
+ * The BAT iterator provides a number of fields that can (and often
+ * should) be used to access information about the BAT.  For string
+ * BATs, if a parallel threads adds values, the offset heap (theap) may
+ * get replaced by a one that is wider.  This involves changing the
+ * twidth and tshift values in the BAT structure.  These changed values
+ * should not be used to access the data in the iterator.  Instead, use
+ * the width and shift values in the iterator itself.
+ */
 typedef struct BATiter {
        BAT *b;
        Heap *h;
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1448,9 +1448,9 @@ BUNinplacemulti(BAT *b, const oid *posit
                                        MT_rwlock_wrunlock(&b->thashlock);
                                        return GDK_FAIL;
                                }
-                               /* reinitialize iterator after heap upgrade */
-                               bi = bat_iterator_nolock(b);
                        }
+                       /* reinitialize iterator after possible heap upgrade */
+                       bi = bat_iterator_nolock(b);
                        _ptr = BUNtloc(bi, p);
                        switch (b->twidth) {
                        default:        /* only three or four cases possible */
diff --git a/gdk/gdk_logger.c b/gdk/gdk_logger.c
--- a/gdk/gdk_logger.c
+++ b/gdk/gdk_logger.c
@@ -331,11 +331,13 @@ log_read_id(logger *lg, log_id *id)
 static log_return
 string_reader(logger *lg, BAT *b, lng nr)
 {
-       int sz = 0;
+       size_t sz = 0;
+       lng SZ = 0;
        log_return res = LOG_OK;
 
-       if (mnstr_readInt(lg->input_log, &sz) != 1)
+       if (mnstr_readLng(lg->input_log, &SZ) != 1)
                return LOG_EOF;
+       sz = (size_t)SZ;
        char *buf = GDKmalloc(sz);
 
        if (!buf || mnstr_read(lg->input_log, buf, sz, 1) != 1) {
@@ -2347,12 +2349,13 @@ string_writer(logger *lg, BAT *b, lng of
        if (buf) {
                for(BUN p = (BUN)offset; p < end; p++) {
                        char *s = BUNtail(bi, p);
-                       strcpy(dst, s);
-                       dst += strlen(s)+1;
+                       size_t len = strlen(s)+1;
+                       memcpy(dst, s, len);
+                       dst += len;
                }
        }
        gdk_return res = GDK_FAIL;
-       if (buf && mnstr_writeInt(lg->output_log, (int) sz) && 
mnstr_write(lg->output_log, buf, sz, 1) == 1)
+       if (buf && mnstr_writeLng(lg->output_log, (lng) sz) && 
mnstr_write(lg->output_log, buf, sz, 1) == 1)
                res = GDK_SUCCEED;
        GDKfree(buf);
        bat_iterator_end(&bi);
diff --git a/monetdb5/modules/mal/manifold.c b/monetdb5/modules/mal/manifold.c
--- a/monetdb5/modules/mal/manifold.c
+++ b/monetdb5/modules/mal/manifold.c
@@ -135,7 +135,7 @@ typedef struct{
                        break;                                                  
                                                        \
                }                                                               
                                                                \
                }                                                               
                                                                \
-               mut->args[0].b->theap->dirty = true;                            
                                \
+               mut->args[0].b->theap->dirty = true;                            
                        \
        } while (0)
 
 // single argument is preparatory step for GDK_mapreduce
@@ -331,7 +331,7 @@ MANIFOLDevaluate(Client cntxt, MalBlkPtr
        mat[0].b->tnonil=false;
        mat[0].b->tsorted=false;
        mat[0].b->trevsorted=false;
-       mat[0].bi = bat_iterator_nolock(mat[0].b);
+       mat[0].bi = (BATiter) {.b = NULL,};
        mat[0].first = (void *)  Tloc(mat[0].b, 0);
        mat[0].last = (void *)  Tloc(mat[0].b, BUNlast(mat[0].b));
 
diff --git a/monetdb5/modules/mal/tablet.c b/monetdb5/modules/mal/tablet.c
--- a/monetdb5/modules/mal/tablet.c
+++ b/monetdb5/modules/mal/tablet.c
@@ -1787,6 +1787,7 @@ SQLload_file(Client cntxt, Tablet *as, b
                                if (as->format[attr].skip)
                                        continue;
                                width = as->format[attr].c->twidth;
+                               as->format[attr].ci = 
bat_iterator_nolock(as->format[attr].c);
                                switch (width){
                                case 1:
                                        trimerrors(bte);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to