Changeset: b134f16b9d4b for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b134f16b9d4b Modified Files: gdk/gdk_calc.c Branch: default Log Message:
Significantly speed up calculation of average of integral types. We now calculate the sum of the values until that doesn't fit into a lng anymore. Only then do we switch over to the much slower method that calculates the running average. diffs (110 lines): diff --git a/gdk/gdk_calc.c b/gdk/gdk_calc.c --- a/gdk/gdk_calc.c +++ b/gdk/gdk_calc.c @@ -9741,23 +9741,75 @@ VARconvert(ValPtr ret, const ValRecord * /* ---------------------------------------------------------------------- */ /* average (any numeric type) */ -#define AVERAGE_TYPE(TYPE) \ - do { \ - TYPE a = 0, x; \ - for (i = start; i < end; i++) { \ - if (cand) { \ - if (i < *cand - b->H->seq) \ - continue; \ - assert(i == *cand - b->H->seq); \ - if (++cand == candend) \ - end = i + 1; \ - } \ - x = ((const TYPE *) src)[i]; \ - if (x == TYPE##_nil) \ - continue; \ - AVERAGE_ITER(TYPE, x, a, r, n); \ - } \ - *avg = n > 0 ? a + (dbl) r / n : dbl_nil; \ +#define AVERAGE_TYPE(TYPE) \ + do { \ + TYPE x, a; \ + \ + /* first try to calculate the sum of all values into a */ \ + /* lng */ \ + for (i = start; i < end; i++) { \ + if (cand) { \ + if (i < *cand - b->H->seq) { \ + continue; \ + } \ + assert(i == *cand - b->H->seq); \ + if (++cand == candend) \ + end = i + 1; \ + } \ + x = ((const TYPE *) src)[i]; \ + if (x == TYPE##_nil) \ + continue; \ + ADD_WITH_CHECK(TYPE, x, \ + lng, sum, \ + lng, sum, \ + goto overflow##TYPE); \ + /* don't count value until after overflow check */ \ + n++; \ + } \ + /* the sum fit, so now we can calculate the average */ \ + *avg = (dbl) sum / n; \ + if (0) { \ + overflow##TYPE: \ + /* we get here if sum(x[0],...,x[i]) doesn't */ \ + /* fit in a lng but sum(x[0],...,x[i-1]) did */ \ + /* the variable sum contains that sum */ \ + /* the rest of the calculation is done */ \ + /* according to the loop invariant described */ \ + /* in the below loop */ \ + if (sum >= 0) { \ + a = (TYPE) (sum / (lng) n); /* this fits */ \ + r = (BUN) (sum % (SBUN) n); \ + } else { \ + sum = -sum; \ + a = - (TYPE) (sum / (lng) n); /* this fits */ \ + r = (BUN) (sum % (SBUN) n); \ + if (r) { \ + a--; \ + r = n - r; \ + } \ + } \ + if (cand) \ + --cand; \ + \ + for (; i < end; i++) { \ + /* loop invariant: */ \ + /* a + r/n == average(x[0],...,x[n]); */ \ + /* 0 <= r < n (if n > 0) */ \ + /* or if n == 0: a == 0; r == 0 */ \ + if (cand) { \ + if (i < *cand - b->H->seq) \ + continue; \ + assert(i == *cand - b->H->seq); \ + if (++cand == candend) \ + end = i + 1; \ + } \ + x = ((const TYPE *) src)[i]; \ + if (x == TYPE##_nil) \ + continue; \ + AVERAGE_ITER(TYPE, x, a, r, n); \ + } \ + *avg = n > 0 ? a + (dbl) r / n : dbl_nil; \ + } \ } while (0) #define AVERAGE_FLOATTYPE(TYPE) \ @@ -9784,9 +9836,13 @@ int BATcalcavg(BAT *b, BAT *s, dbl *avg, BUN *vals) { BUN n = 0, r = 0, i = 0; + lng sum = 0; BUN start, end, cnt; const oid *cand = NULL, *candend = NULL; const void *src; + /* these two needed for ADD_WITH_CHECK macro */ + int abort_on_error = 1; + BUN nils = 0; CANDINIT(b, s); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list