Changeset: f520fd3fd555 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/f520fd3fd555 Modified Files: gdk/gdk_atoms.c gdk/gdk_hash.c gdk/gdk_hash.h sql/test/BugTracker-2023/Tests/misc-crashes-7390.test Branch: Dec2023 Log Message:
Floating point types should not use the bit patterns for hashing. Since 0.0 == -0.0, but 0.0 and -0.0 have different bit representations, we can't use int/lng hashes to calculate hashes for flt/dbl. Also, there are multiple representations for NaN (which we use for NULL) that all have to map to the same hash value. diffs (175 lines): diff --git a/gdk/gdk_atoms.c b/gdk/gdk_atoms.c --- a/gdk/gdk_atoms.c +++ b/gdk/gdk_atoms.c @@ -124,6 +124,26 @@ hgeHash(const hge *v) } #endif +static BUN +fltHash(const flt *v) +{ + if (is_flt_nil(*v)) + return (BUN) mix_int(GDK_int_min); + if (*v == 0) + return (BUN) mix_int(0); + return (BUN) mix_int(*(const unsigned int *) v); +} + +static BUN +dblHash(const dbl *v) +{ + if (is_dbl_nil(*v)) + return (BUN) mix_lng(GDK_lng_min); + if (*v == 0) + return (BUN) mix_lng(0); + return (BUN) mix_lng(*(const ulng *) v); +} + /* * @+ Standard Atoms */ @@ -1786,7 +1806,7 @@ atomDesc BATatoms[MAXATOMS] = { .atomRead = (void *(*)(void *, size_t *, stream *, size_t)) fltRead, .atomWrite = (gdk_return (*)(const void *, stream *, size_t)) fltWrite, .atomCmp = (int (*)(const void *, const void *)) fltCmp, - .atomHash = (BUN (*)(const void *)) intHash, + .atomHash = (BUN (*)(const void *)) fltHash, }, [TYPE_dbl] = { .name = "dbl", @@ -1799,7 +1819,7 @@ atomDesc BATatoms[MAXATOMS] = { .atomRead = (void *(*)(void *, size_t *, stream *, size_t)) dblRead, .atomWrite = (gdk_return (*)(const void *, stream *, size_t)) dblWrite, .atomCmp = (int (*)(const void *, const void *)) dblCmp, - .atomHash = (BUN (*)(const void *)) lngHash, + .atomHash = (BUN (*)(const void *)) dblHash, }, [TYPE_lng] = { .name = "lng", diff --git a/gdk/gdk_hash.c b/gdk/gdk_hash.c --- a/gdk/gdk_hash.c +++ b/gdk/gdk_hash.c @@ -102,10 +102,12 @@ HASHclear(Hash *h) memset(h->Bckt, 0xFF, h->nbucket * h->width); } -#define HASH_VERSION 5 -/* this is only for the change of hash function of the UUID type and MBR - * type; if HASH_VERSION is increased again from 5, the code associated - * with HASH_VERSION_NOUUID and HASH_VERSION_NOMBR must be deleted */ +#define HASH_VERSION 6 +/* this is only for the change of hash function of the floating point + * types, the UUID type and the MBR type; if HASH_VERSION is increased + * again from 6, the code associated with HASH_VERSION_NOUUID and + * HASH_VERSION_NOMBR must be deleted */ +#define HASH_VERSION_FLOAT 5 #define HASH_VERSION_NOMBR 4 #define HASH_VERSION_NOUUID 3 #define HASH_HEADER_SIZE 7 /* nr of size_t fields in header */ @@ -509,6 +511,8 @@ BATcheckhash(BAT *b) ((size_t) 1 << 24) | #endif HASH_VERSION_NOUUID) && + strcmp(ATOMname(b->ttype), "flt") != 0 && + strcmp(ATOMname(b->ttype), "dbl") != 0 && strcmp(ATOMname(b->ttype), "uuid") != 0 && strcmp(ATOMname(b->ttype), "mbr") != 0) #endif @@ -519,8 +523,20 @@ BATcheckhash(BAT *b) ((size_t) 1 << 24) | #endif HASH_VERSION_NOMBR) && + strcmp(ATOMname(b->ttype), "flt") != 0 && + strcmp(ATOMname(b->ttype), "dbl") != 0 && strcmp(ATOMname(b->ttype), "mbr") != 0) #endif +#ifdef HASH_VERSION_FLOAT + /* if not floating point, also allow previous version */ + || (hdata[0] == ( +#ifdef PERSISTENTHASH + ((size_t) 1 << 24) | +#endif + HASH_VERSION_FLOAT) && + strcmp(ATOMname(b->ttype), "flt") != 0 && + strcmp(ATOMname(b->ttype), "dbl") != 0) +#endif ) && hdata[1] > 0 && ( diff --git a/gdk/gdk_hash.h b/gdk/gdk_hash.h --- a/gdk/gdk_hash.h +++ b/gdk/gdk_hash.h @@ -210,8 +210,8 @@ HASHgetlink(const Hash *h, BUN i) #define hash_oid(H,V) hash_lng(H,V) #endif -#define hash_flt(H,V) hash_int(H,V) -#define hash_dbl(H,V) hash_lng(H,V) +#define hash_flt(H,V) HASHbucket(H, ATOMhash(TYPE_flt, (V))) +#define hash_dbl(H,V) HASHbucket(H, ATOMhash(TYPE_dbl, (V))) static inline BUN __attribute__((__pure__)) mix_uuid(const uuid *u) diff --git a/sql/test/BugTracker-2023/Tests/misc-crashes-7390.test b/sql/test/BugTracker-2023/Tests/misc-crashes-7390.test --- a/sql/test/BugTracker-2023/Tests/misc-crashes-7390.test +++ b/sql/test/BugTracker-2023/Tests/misc-crashes-7390.test @@ -322,7 +322,7 @@ DROP TABLE v0 statement ok CREATE TABLE v0 (v1 INTEGER PRIMARY KEY) -query I nosort +query R nosort SELECT 67 + 0 + -1 + 96 + 46463082.000000 + 30 AS v2 FROM v0 WHERE 255 = v1 LIMIT 66 OFFSET 16 ---- @@ -347,16 +347,16 @@ CREATE TABLE v0(v1 FLOAT) statement ok INSERT INTO v0 VALUES (0),(67),(127),(-1),(NULL),(NULL),(NULL),(NULL) -query IT nosort +query RT nosort SELECT * , 'x' FROM v0 WHERE (SELECT 39 WHERE (v1 + -32768 NOT IN (14, 255))) * 87 + 2147483647 ---- -0 +0.000 x -67 +67.000 x -127 +127.000 x --1 +-1.000 x statement ok @@ -367,20 +367,11 @@ SELECT count(*) FROM v0 ---- 131080 --- after the second insert, rerun same query as above, now it crashes server with assertion failure --- gdk/gdk_bat.c:2098: BATsetcount: Assertion `b->batCapacity >= cnt' failed. -skipif knownfail -query IT nosort +-- after the second insert, rerun same query as above, since it used to crash the server +query RT nosort SELECT * , 'x' FROM v0 WHERE (SELECT 39 WHERE (v1 + -32768 NOT IN (14, 255))) * 87 + 2147483647 ---- -0 -x -67 -x -127 -x --1 -x +131080 values hashing to 6b7d0af1bd7606b28955e7b023646d9d -- mul_bte_bte_bte: ERROR: 22003!overflow in calculation 87*39. @@ -576,3 +567,4 @@ DELETE FROM v0 WHERE EXISTS ( SELECT 2 F statement ok DROP TABLE v0 + _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org