Changeset: 4110d3fb15c4 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/4110d3fb15c4 Modified Files: gdk/gdk.h gdk/gdk_bat.c monetdb5/modules/mal/manifold.c monetdb5/modules/mal/tablet.c Branch: Jul2021 Log Message:
Reinitialize bat iterators if the heaps may have been extended. diffs (92 lines): diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -871,7 +871,40 @@ gdk_export size_t HEAPmemsize(Heap *h); gdk_export void HEAPdecref(Heap *h, bool remove); gdk_export void HEAPincref(Heap *h); -/* BAT iterator, also protects use of BAT heaps with reference counts */ +/* BAT iterator, also protects use of BAT heaps with reference counts. + * + * A BAT iterator has to be used with caution, but it does have to be + * used in many place. + * + * An iterator is initialized by assigning it the result of a call to + * either bat_iterator or bat_iterator_nolock. The former must be + * accompanied by a call to bat_iterator_end to release resources. + * + * bat_iterator should be used for BATs that could possibly be modified + * in another thread while we're reading the contents of the BAT. + * Alternatively, but only for very quick access, the theaplock can be + * taken, the data read, and the lock released. For longer duration + * accesses, it is better to use the iterator, even without the BUNt* + * macros, since the theaplock is only held very briefly. + * + * If BATs are to be modified, higher level code must assure that no + * other thread is going to modify the same BAT at the same time. A + * to-be-modified BAT should not use bat_iterator. It can use + * bat_iterator_nolock, but be aware that this creates a copy of the + * heap pointer(s) (i.e. theap and tvheap) and if the heaps get + * extended, the pointers in the BAT structure may be modified, but that + * does not modify the pointers in the iterator. This means that after + * operations that may grow a heap, the iterator should be + * reinitialized. + * + * The BAT iterator provides a number of fields that can (and often + * should) be used to access information about the BAT. For string + * BATs, if a parallel threads adds values, the offset heap (theap) may + * get replaced by a one that is wider. This involves changing the + * twidth and tshift values in the BAT structure. These changed values + * should not be used to access the data in the iterator. Instead, use + * the width and shift values in the iterator itself. + */ typedef struct BATiter { BAT *b; Heap *h; diff --git a/gdk/gdk_bat.c b/gdk/gdk_bat.c --- a/gdk/gdk_bat.c +++ b/gdk/gdk_bat.c @@ -1448,9 +1448,9 @@ BUNinplacemulti(BAT *b, const oid *posit MT_rwlock_wrunlock(&b->thashlock); return GDK_FAIL; } - /* reinitialize iterator after heap upgrade */ - bi = bat_iterator_nolock(b); } + /* reinitialize iterator after possible heap upgrade */ + bi = bat_iterator_nolock(b); _ptr = BUNtloc(bi, p); switch (b->twidth) { default: /* only three or four cases possible */ diff --git a/monetdb5/modules/mal/manifold.c b/monetdb5/modules/mal/manifold.c --- a/monetdb5/modules/mal/manifold.c +++ b/monetdb5/modules/mal/manifold.c @@ -135,7 +135,7 @@ typedef struct{ break; \ } \ } \ - mut->args[0].b->theap->dirty = true; \ + mut->args[0].b->theap->dirty = true; \ } while (0) // single argument is preparatory step for GDK_mapreduce @@ -331,7 +331,7 @@ MANIFOLDevaluate(Client cntxt, MalBlkPtr mat[0].b->tnonil=false; mat[0].b->tsorted=false; mat[0].b->trevsorted=false; - mat[0].bi = bat_iterator_nolock(mat[0].b); + mat[0].bi = (BATiter) {.b = NULL,}; mat[0].first = (void *) Tloc(mat[0].b, 0); mat[0].last = (void *) Tloc(mat[0].b, BUNlast(mat[0].b)); diff --git a/monetdb5/modules/mal/tablet.c b/monetdb5/modules/mal/tablet.c --- a/monetdb5/modules/mal/tablet.c +++ b/monetdb5/modules/mal/tablet.c @@ -1787,6 +1787,7 @@ SQLload_file(Client cntxt, Tablet *as, b if (as->format[attr].skip) continue; width = as->format[attr].c->twidth; + as->format[attr].ci = bat_iterator_nolock(as->format[attr].c); switch (width){ case 1: trimerrors(bte); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list