Changeset: ce23fa286bb6 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=ce23fa286bb6 Modified Files: gdk/gdk_join.c Branch: Jun2020 Log Message:
Ignore the cost of creating a hash when the bat is persistent. The idea is that the hash is going to be used a lot of times, so the cost of creating it once doesn't matter too much. diffs (67 lines): diff --git a/gdk/gdk_join.c b/gdk/gdk_join.c --- a/gdk/gdk_join.c +++ b/gdk/gdk_join.c @@ -3162,7 +3162,13 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B /* no hash table, so cost includes time to build the * hash table (single scan) plus the time to do the * lookups (also single scan, we assume some chains) */ - rcost = (double) rci.ncand * 2 + lci.ncand * 1.1; + rcost = lci.ncand * 1.1; +#ifdef PERSISTENTHASH + /* only count the cost of creating the hash for + * non-persistent bats */ + if (rci.ncand != BATcount(r) || !(BBP_status(r->batCacheid) & BBPEXISTING) || r->theap.dirty || GDKinmemory()) +#endif + rcost += rci.ncand * 2.0; } else { if (rci.noids > 0) { /* if we need to do binary search on candidate @@ -3205,7 +3211,13 @@ leftjoin(BAT **r1p, BAT **r2p, BAT *l, B /* no hash table, so cost includes time to build the * hash table (single scan) plus the time to do the * lookups (also single scan, we assume some chains) */ - lcost = (double) lci.ncand * 2 + rci.ncand * 1.1; + lcost = rci.ncand * 1.1; +#ifdef PERSISTENTHASH + /* only count the cost of creating the hash + * for non-persistent bats */ + if (lci.ncand != BATcount(l) || !(BBP_status(l->batCacheid) & BBPEXISTING) || l->theap.dirty || GDKinmemory()) +#endif + lcost += lci.ncand * 2.0; } else { if (lci.noids > 0) { /* if we need to do binary search on candidate @@ -3501,9 +3513,13 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA /* no hash table, so cost includes time to build the * hash table (single scan) plus the time to do the * lookups (also single scan, we assume some chains) */ - lcost = (double) lci.ncand * 2 + rci.ncand * 1.1; - if (lci.ncand == BATcount(l) && !l->batTransient) - lcost *= 0.8; + lcost = rci.ncand * 1.1; +#ifdef PERSISTENTHASH + /* only count the cost of creating the hash for + * non-persistent bats */ + if (lci.ncand != BATcount(l) || !(BBP_status(l->batCacheid) & BBPEXISTING) || l->theap.dirty || GDKinmemory()) +#endif + lcost += lci.ncand * 2.0; } else { if (lci.noids > 0) { /* if we need to do binary search on candidate @@ -3542,9 +3558,13 @@ BATjoin(BAT **r1p, BAT **r2p, BAT *l, BA /* no hash table, so cost includes time to build the * hash table (single scan) plus the time to do the * lookups (also single scan, we assume some chains) */ - rcost = (double) rci.ncand * 2 + lci.ncand * 1.1; - if (rci.ncand == BATcount(r) && !r->batTransient) - rcost *= 0.8; + rcost = lci.ncand * 1.1; +#ifdef PERSISTENTHASH + /* only count the cost of creating the hash for + * non-persistent bats */ + if (rci.ncand != BATcount(r) || !(BBP_status(r->batCacheid) & BBPEXISTING) || r->theap.dirty || GDKinmemory()) +#endif + rcost += rci.ncand * 2.0; } else { if (rci.noids > 0) { /* if we need to do binary search on candidate _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list