Changeset: b134f16b9d4b for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b134f16b9d4b
Modified Files:
        gdk/gdk_calc.c
Branch: default
Log Message:

Significantly speed up calculation of average of integral types.
We now calculate the sum of the values until that doesn't fit into a
lng anymore.  Only then do we switch over to the much slower method
that calculates the running average.


diffs (110 lines):

diff --git a/gdk/gdk_calc.c b/gdk/gdk_calc.c
--- a/gdk/gdk_calc.c
+++ b/gdk/gdk_calc.c
@@ -9741,23 +9741,75 @@ VARconvert(ValPtr ret, const ValRecord *
 /* ---------------------------------------------------------------------- */
 /* average (any numeric type) */
 
-#define AVERAGE_TYPE(TYPE)                                     \
-       do {                                                    \
-               TYPE a = 0, x;                                  \
-               for (i = start; i < end; i++) {                 \
-                       if (cand) {                             \
-                               if (i < *cand - b->H->seq)      \
-                                       continue;               \
-                               assert(i == *cand - b->H->seq); \
-                               if (++cand == candend)          \
-                                       end = i + 1;            \
-                       }                                       \
-                       x = ((const TYPE *) src)[i];            \
-                       if (x == TYPE##_nil)                    \
-                               continue;                       \
-                       AVERAGE_ITER(TYPE, x, a, r, n);         \
-               }                                               \
-               *avg = n > 0 ? a + (dbl) r / n : dbl_nil;       \
+#define AVERAGE_TYPE(TYPE)                                             \
+       do {                                                            \
+               TYPE x, a;                                              \
+                                                                       \
+               /* first try to calculate the sum of all values into a */ \
+               /* lng */                                               \
+               for (i = start; i < end; i++) {                         \
+                       if (cand) {                                     \
+                               if (i < *cand - b->H->seq) {            \
+                                       continue;                       \
+                               }                                       \
+                               assert(i == *cand - b->H->seq);         \
+                               if (++cand == candend)                  \
+                                       end = i + 1;                    \
+                       }                                               \
+                       x = ((const TYPE *) src)[i];                    \
+                       if (x == TYPE##_nil)                            \
+                               continue;                               \
+                       ADD_WITH_CHECK(TYPE, x,                         \
+                                      lng, sum,                        \
+                                      lng, sum,                        \
+                                      goto overflow##TYPE);            \
+                       /* don't count value until after overflow check */ \
+                       n++;                                            \
+               }                                                       \
+               /* the sum fit, so now we can calculate the average */  \
+               *avg = (dbl) sum / n;                                   \
+               if (0) {                                                \
+                 overflow##TYPE:                                       \
+                       /* we get here if sum(x[0],...,x[i]) doesn't */ \
+                       /* fit in a lng but sum(x[0],...,x[i-1]) did */ \
+                       /* the variable sum contains that sum */        \
+                       /* the rest of the calculation is done */       \
+                       /* according to the loop invariant described */ \
+                       /* in the below loop */                         \
+                       if (sum >= 0) {                                 \
+                               a = (TYPE) (sum / (lng) n); /* this fits */ \
+                               r = (BUN) (sum % (SBUN) n);             \
+                       } else {                                        \
+                               sum = -sum;                             \
+                               a = - (TYPE) (sum / (lng) n); /* this fits */ \
+                               r = (BUN) (sum % (SBUN) n);             \
+                               if (r) {                                \
+                                       a--;                            \
+                                       r = n - r;                      \
+                               }                                       \
+                       }                                               \
+                       if (cand)                                       \
+                               --cand;                                 \
+                                                                       \
+                       for (; i < end; i++) {                          \
+                               /* loop invariant: */                   \
+                               /* a + r/n == average(x[0],...,x[n]); */ \
+                               /* 0 <= r < n (if n > 0) */             \
+                               /* or if n == 0: a == 0; r == 0 */      \
+                               if (cand) {                             \
+                                       if (i < *cand - b->H->seq)      \
+                                               continue;               \
+                                       assert(i == *cand - b->H->seq); \
+                                       if (++cand == candend)          \
+                                               end = i + 1;            \
+                               }                                       \
+                               x = ((const TYPE *) src)[i];            \
+                               if (x == TYPE##_nil)                    \
+                                       continue;                       \
+                               AVERAGE_ITER(TYPE, x, a, r, n);         \
+                       }                                               \
+                       *avg = n > 0 ? a + (dbl) r / n : dbl_nil;       \
+               }                                                       \
        } while (0)
 
 #define AVERAGE_FLOATTYPE(TYPE)                                        \
@@ -9784,9 +9836,13 @@ int
 BATcalcavg(BAT *b, BAT *s, dbl *avg, BUN *vals)
 {
        BUN n = 0, r = 0, i = 0;
+       lng sum = 0;
        BUN start, end, cnt;
        const oid *cand = NULL, *candend = NULL;
        const void *src;
+       /* these two needed for ADD_WITH_CHECK macro */
+       int abort_on_error = 1;
+       BUN nils = 0;
 
        CANDINIT(b, s);
 
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to