Changeset: d57aa09d76a0 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d57aa09d76a0
Modified Files:
        gdk/gdk_strimps.c
Branch: string_imprints
Log Message:

Avoid building the strimp if there are not enough pairs

If there are not at least STRIMP_HEADER_SIZE different pairs in the
pair histogram, stop the strimp construction early. This can happen in
a column that is mostly numbers or symbols for example since these are
ignored during the histogram construction process.

This fixes a crash that we observed on the TPC-DS benchmark query 91.


diffs (49 lines):

diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c
--- a/gdk/gdk_strimps.c
+++ b/gdk/gdk_strimps.c
@@ -253,6 +253,7 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai
        PairIterator pi, *pip;
        CharPair cp, *cpp;
        struct canditer ci;
+       size_t values = 0;
 
 
        TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec();
@@ -308,6 +309,7 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai
 #endif
                                        hist[hidx].cnt++;
                                        if (hist[hidx].p == NULL) {
+                                               values++;
                                                hist[hidx].p = (CharPair 
*)GDKmalloc(sizeof(CharPair));
                                                hist[hidx].p->psize = 
cpp->psize;
                                                hist[hidx].p->pbytes = 
cpp->pbytes;
@@ -319,18 +321,22 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai
        }
        bat_iterator_end(&bi);
 
-       // Choose the header pairs
-       STRMPchoosePairs(hist, hlen, hpairs);
-       for(hidx = 0; hidx < hlen; hidx++) {
-               if(hist[hidx].p) {
-                       GDKfree(hist[hidx].p);
-                       hist[hidx].p = NULL;
+       // Check that we did record something in the histogram.
+       if(values >= STRIMP_HEADER_SIZE) {
+               // Choose the header pairs
+               STRMPchoosePairs(hist, hlen, hpairs);
+               for (hidx = 0; hidx < hlen; hidx++) {
+                       if (hist[hidx].p) {
+                               GDKfree(hist[hidx].p);
+                               hist[hidx].p = NULL;
+                       }
                }
        }
+
        GDKfree(hist);
 
        TRC_DEBUG(ACCELERATOR, LLFMT " usec\n", GDKusec() - t0);
-       return true;
+       return values >= STRIMP_HEADER_SIZE;
 }
 
 /* Create the heap for a string imprint. Returns NULL on failure. This
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to