Changeset: d57aa09d76a0 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/d57aa09d76a0 Modified Files: gdk/gdk_strimps.c Branch: string_imprints Log Message:
Avoid building the strimp if there are not enough pairs If there are not at least STRIMP_HEADER_SIZE different pairs in the pair histogram, stop the strimp construction early. This can happen in a column that is mostly numbers or symbols for example since these are ignored during the histogram construction process. This fixes a crash that we observed on the TPC-DS benchmark query 91. diffs (49 lines): diff --git a/gdk/gdk_strimps.c b/gdk/gdk_strimps.c --- a/gdk/gdk_strimps.c +++ b/gdk/gdk_strimps.c @@ -253,6 +253,7 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai PairIterator pi, *pip; CharPair cp, *cpp; struct canditer ci; + size_t values = 0; TRC_DEBUG_IF(ACCELERATOR) t0 = GDKusec(); @@ -308,6 +309,7 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai #endif hist[hidx].cnt++; if (hist[hidx].p == NULL) { + values++; hist[hidx].p = (CharPair *)GDKmalloc(sizeof(CharPair)); hist[hidx].p->psize = cpp->psize; hist[hidx].p->pbytes = cpp->pbytes; @@ -319,18 +321,22 @@ STRMPbuildHeader(BAT *b, BAT *s, CharPai } bat_iterator_end(&bi); - // Choose the header pairs - STRMPchoosePairs(hist, hlen, hpairs); - for(hidx = 0; hidx < hlen; hidx++) { - if(hist[hidx].p) { - GDKfree(hist[hidx].p); - hist[hidx].p = NULL; + // Check that we did record something in the histogram. + if(values >= STRIMP_HEADER_SIZE) { + // Choose the header pairs + STRMPchoosePairs(hist, hlen, hpairs); + for (hidx = 0; hidx < hlen; hidx++) { + if (hist[hidx].p) { + GDKfree(hist[hidx].p); + hist[hidx].p = NULL; + } } } + GDKfree(hist); TRC_DEBUG(ACCELERATOR, LLFMT " usec\n", GDKusec() - t0); - return true; + return values >= STRIMP_HEADER_SIZE; } /* Create the heap for a string imprint. Returns NULL on failure. This _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list