Changeset: 4110d3fb15c4 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/4110d3fb15c4
Modified Files:
        gdk/gdk.h
        gdk/gdk_bat.c
        monetdb5/modules/mal/manifold.c
        monetdb5/modules/mal/tablet.c
Branch: Jul2021
Log Message:

Reinitialize bat iterators if the heaps may have been extended.


diffs (92 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -871,7 +871,40 @@ gdk_export size_t HEAPmemsize(Heap *h);
 gdk_export void HEAPdecref(Heap *h, bool remove);
 gdk_export void HEAPincref(Heap *h);
 
-/* BAT iterator, also protects use of BAT heaps with reference counts */
+/* BAT iterator, also protects use of BAT heaps with reference counts.
+ *
+ * A BAT iterator has to be used with caution, but it does have to be
+ * used in many place.
+ *
+ * An iterator is initialized by assigning it the result of a call to
+ * either bat_iterator or bat_iterator_nolock.  The former must be
+ * accompanied by a call to bat_iterator_end to release resources.
+ *
+ * bat_iterator should be used for BATs that could possibly be modified
+ * in another thread while we're reading the contents of the BAT.
+ * Alternatively, but only for very quick access, the theaplock can be
+ * taken, the data read, and the lock released.  For longer duration
+ * accesses, it is better to use the iterator, even without the BUNt*
+ * macros, since the theaplock is only held very briefly.
+ *
+ * If BATs are to be modified, higher level code must assure that no
+ * other thread is going to modify the same BAT at the same time.  A
+ * to-be-modified BAT should not use bat_iterator.  It can use
+ * bat_iterator_nolock, but be aware that this creates a copy of the
+ * heap pointer(s) (i.e. theap and tvheap) and if the heaps get
+ * extended, the pointers in the BAT structure may be modified, but that
+ * does not modify the pointers in the iterator.  This means that after
+ * operations that may grow a heap, the iterator should be
+ * reinitialized.
+ *
+ * The BAT iterator provides a number of fields that can (and often
+ * should) be used to access information about the BAT.  For string
+ * BATs, if a parallel threads adds values, the offset heap (theap) may
+ * get replaced by a one that is wider.  This involves changing the
+ * twidth and tshift values in the BAT structure.  These changed values
+ * should not be used to access the data in the iterator.  Instead, use
+ * the width and shift values in the iterator itself.
+ */
 typedef struct BATiter {
        BAT *b;
        Heap *h;
diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c
--- a/gdk/gdk_bat.c
+++ b/gdk/gdk_bat.c
@@ -1448,9 +1448,9 @@ BUNinplacemulti(BAT *b, const oid *posit
                                        MT_rwlock_wrunlock(&b->thashlock);
                                        return GDK_FAIL;
                                }
-                               /* reinitialize iterator after heap upgrade */
-                               bi = bat_iterator_nolock(b);
                        }
+                       /* reinitialize iterator after possible heap upgrade */
+                       bi = bat_iterator_nolock(b);
                        _ptr = BUNtloc(bi, p);
                        switch (b->twidth) {
                        default:        /* only three or four cases possible */
diff --git a/monetdb5/modules/mal/manifold.c b/monetdb5/modules/mal/manifold.c
--- a/monetdb5/modules/mal/manifold.c
+++ b/monetdb5/modules/mal/manifold.c
@@ -135,7 +135,7 @@ typedef struct{
                        break;                                                  
                                                        \
                }                                                               
                                                                \
                }                                                               
                                                                \
-               mut->args[0].b->theap->dirty = true;                            
                                \
+               mut->args[0].b->theap->dirty = true;                            
                        \
        } while (0)
 
 // single argument is preparatory step for GDK_mapreduce
@@ -331,7 +331,7 @@ MANIFOLDevaluate(Client cntxt, MalBlkPtr
        mat[0].b->tnonil=false;
        mat[0].b->tsorted=false;
        mat[0].b->trevsorted=false;
-       mat[0].bi = bat_iterator_nolock(mat[0].b);
+       mat[0].bi = (BATiter) {.b = NULL,};
        mat[0].first = (void *)  Tloc(mat[0].b, 0);
        mat[0].last = (void *)  Tloc(mat[0].b, BUNlast(mat[0].b));
 
diff --git a/monetdb5/modules/mal/tablet.c b/monetdb5/modules/mal/tablet.c
--- a/monetdb5/modules/mal/tablet.c
+++ b/monetdb5/modules/mal/tablet.c
@@ -1787,6 +1787,7 @@ SQLload_file(Client cntxt, Tablet *as, b
                                if (as->format[attr].skip)
                                        continue;
                                width = as->format[attr].c->twidth;
+                               as->format[attr].ci = 
bat_iterator_nolock(as->format[attr].c);
                                switch (width){
                                case 1:
                                        trimerrors(bte);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to