Changeset: 71ba9385dbfa for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=71ba9385dbfa
Modified Files:
        monetdb5/modules/mal/array.mx
Branch: SciQL-2
Log Message:

ARRAYtiles*(): cleaned-up & extended sanity checks

expensive checks that require data scans are only done
when assertions or property checking are enabled

Our current implementation of tiled array aggregations is restricted to:

- all dimensions must be of the same type
- only dimension types SMALLINT (bte), TINYINY (sht), INT(EGER) (int) are 
supported
- dimensions must be ascending, i.e., start <= stop && step > 0
- only step-size 1 is supported
- arrays must be stored "canonically", i.e., sorted (ascending) on first 
dimension,
  and each subsequent dimension sub-sorted (ascending) within each value of its 
preceding dimension


diffs (214 lines):

diff --git a/monetdb5/modules/mal/array.mx b/monetdb5/modules/mal/array.mx
--- a/monetdb5/modules/mal/array.mx
+++ b/monetdb5/modules/mal/array.mx
@@ -558,6 +558,15 @@ ARRAYfiller(Client cntxt, MalBlkPtr mb, 
  *   dim_1:BAT,tile_member_offset_dim_1:BAT,size_dim_1, ...,
  *   dim_n:BAT,tile_member_offset_dim_n:BAT,size_dim_n)
  */
+/*
+ * CAVEATs:
+ * - all dimensions must be of the same type
+ * - only dimension types SMALLINT (bte), TINYINY (sht), INT(EGER) (int) are 
supported
+ * - dimensions must be ascending, i.e., start <= stop && step > 0
+ * - only step-size 1 is supported
+ * - array must be stored "canonically", i.e., sorted (ascending) on first 
dimension,
+ *   and each subsequent dimension sub-sorted (ascending) within each value of 
its preceding dimension
+ */
 str
 ARRAYtiles_@4_@1_@8(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
 {
@@ -577,6 +586,7 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
        if ( (pci->argc - pci->retc - 1) % 3 != 0)
                throw(MAL, "array.@4", "Unbalanced argument sets");
 
+       /* allocate local data structures */
        ndims = (pci->argc - pci->retc - 1) / 3;
        bDims = (BAT**) GDKzalloc(sizeof(BAT*) * ndims);
        bDimsT = (@8**) GDKzalloc(sizeof(@8*) * ndims);
@@ -589,6 +599,8 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
                AGGR_CLEANUP();
                throw(MAL, "array.@4", MAL_MALLOC_FAIL);
        }
+
+       /* handle & chack arguments */
        if (!(bVal = BATdescriptor(*(bat*)getArgReference(stk,pci,1)))) {
                AGGR_CLEANUP();
                throw(MAL, "array.@4", RUNTIME_OBJECT_MISSING);
@@ -602,20 +614,21 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
                        throw(MAL, "array.@4", RUNTIME_OBJECT_MISSING);
                }
                dSize[i] = *(int*)getArgReference(stk,pci,2+i*3+2);
-               if (dSize[i] == 0) {
+               if (dSize[i] <= 0) {
                        AGGR_CLEANUP();
-                       throw(MAL, "array.@4", "size dimension %d must not be 
0", i);
+                       throw(MAL, "array.@4", "size of dimension %d (%d) must 
not be <= 0", i, (int) dSize[i]);
                }
                arrsze *= dSize[i];
        }
-       /* type check the shapes, prepare iterators, and compute the min/max of 
the dimensions */
+
+       /* check sanity of value BAT */
        if (!BAThdense(bVal)) {
                AGGR_CLEANUP();
-               throw(MAL, "array.@4", "head of value BAT must be dense");
+               throw(MAL, "array.@4", "head of value BAT is not dense");
        }
        if (bVal->ttype != TYPE_@1) {
                AGGR_CLEANUP();
-               throw(MAL, "array.@4", "tail of value BAT must be of type @1");
+               throw(MAL, "array.@4", "tail type of value BAT is not type @1");
        }
        arrcnt = BATcount(bVal);
        if (arrcnt != arrsze) {
@@ -623,6 +636,10 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
                throw(MAL, "array.@4", "count of value BAT ("BUNFMT") != 
product of dimension sizes ("BUNFMT")",
                        arrcnt, arrsze);
        }
+       /* access tail as array */
+       bValT = (@1*) Tloc(bVal, BUNfirst(bVal));
+
+       /* check sanity of dimension & offset BATs properties */
        arrbase = bVal->hseqbase;
        offbase = bOffsets[0]->hseqbase;
        offcnt = BATcount(bOffsets[0]);
@@ -643,69 +660,81 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
                        AGGR_CLEANUP();
                        throw(MAL, "array.@4", "head of offset BAT %d is not 
aligned with head of offset BAT 0", i);
                }
-               if (bDims[i]->ttype != TYPE_@8 || bDims[i]->ttype != 
bOffsets[i]->ttype) {
+               if (bDims[i]->ttype != TYPE_@8) {
                        AGGR_CLEANUP();
                        throw(MAL, "array.@4", "tail type of dimension BAT %d 
is not type @8", i);
                }
-               /* ! might require 2 full scans ! */
-               BATmin(bDims[i], &(dMin[i]));
-               BATmax(bDims[i], &(dMax[i]));
+               if (bOffsets[i]->ttype != TYPE_@8) {
+                       AGGR_CLEANUP();
+                       throw(MAL, "array.@4", "tail type of offset BAT %d is 
not type @8", i);
+               }
+       }
+
+       /* check sanity of dimension BATs content */
+       for (i = 0; i < ndims; i++) {
+               /* access tails as arrays */
+               bDimsT[i] = (@8*) Tloc(bDims[i], BUNfirst(bDims[i]));
+               bOffsetsT[i] = (@8*) Tloc(bOffsets[i], BUNfirst(bOffsets[i]));
+
+               /* be optimistic */
+               dMin[i] = bDimsT[i][0];
+               dMax[i] = bDimsT[i][arrcnt-1];
+
+#ifdef NDEBUG
+               PROPDEBUG
+#endif
+               {
+                       /* this might require several full scans; hence,
+                        * only done when assertions or property checking
+                        * are enabled */
+
+                       BATmin(bDims[i], &(dMin[i]));
+                       BATmax(bDims[i], &(dMax[i]));
+
+                       for (p = 0, r = arrsze; p < arrcnt; p += arrsze, r += 
arrsze) {
+                               BAT *slice = BATslice(bDims[i],p,r);
+
+                               BATderiveHeadProps(BATmirror(slice),0);
+                               if (!BATtordered(slice)) {
+                                       BBPunfix(slice->batCacheid);
+                                       AGGR_CLEANUP();
+                                       if (i == 0) {
+                                               throw(MAL, "array.@4", "values 
of dimension %d are not sorted", i);
+                                       } else {
+                                               throw(MAL, "array.@4", "values 
of dimension %d are not sorted"
+                                                       " within value "BUNFMT" 
of preceeding dimension", i, p / arrsze);
+                                       }
+                               }
+                               BBPunfix(slice->batCacheid);
+
+                               if (bDimsT[i][p] != dMin[i]) {
+                                       AGGR_CLEANUP();
+                                       throw(MAL, "array.@4", "first value of 
slice "BUNFMT" of dimension %d (%d) is not the minimum (%d)",
+                                               p / arrsze, i, (int) 
bDimsT[i][p], (int) dMin[i]);
+                               }
+                               if (bDimsT[i][r-1] != dMax[i]) {
+                                       AGGR_CLEANUP();
+                                       throw(MAL, "array.@4", "last value of 
slice "BUNFMT" of dimension %d (%d) is not the maximum (%d)",
+                                               p / arrsze, i, (int) 
bDimsT[i][r-1], (int) dMax[i]);
+                               }
+                       }
+               }
+
+               if (dMin[i] > dMax[i]) {
+                       AGGR_CLEANUP();
+                       throw(MAL, "array.@4", "minimum value of dimension BAT 
%d (%d) must not be larger than its maximum value (%d)",
+                               i, (int) dMin[i], (int) dMax[i]);
+               }
                if ((int) (dMax[i] - dMin[i] + 1) != dSize[i]) {
                        AGGR_CLEANUP();
                        throw(MAL, "array.@4", "range of dimension BAT %d (%d) 
does not match its size (%d)",
                                i, (int) (dMax[i] - dMin[i] + 1), dSize[i]);
                }
 
-               /* might require (partial) scans; hence, only done when
-                * assertions or property checking enabled */
-#ifdef NDEBUG
-               PROPDEBUG
-#endif
-               {
-                       BAT *slice;
-                       slice = BATslice(bDims[i],0,arrsze);
-                       BATderiveHeadProps(BATmirror(slice),0);
-                       if (!BATtordered(slice)) {
-                               BBPunfix(slice->batCacheid);
-                               AGGR_CLEANUP();
-                               throw(MAL, "array.@4", "values of dimension %d 
are not sorted %s",
-                                       i, i ? "within first value of 
preceeding dimension" : "");
-                       }
-                       BBPunfix(slice->batCacheid);
-                       if (arrsze < arrcnt) {
-                               slice = BATslice(bDims[i],arrcnt-arrsze,arrcnt);
-                               BATderiveHeadProps(BATmirror(slice),0);
-                               if (!BATtordered(slice)) {
-                                       BBPunfix(slice->batCacheid);
-                                       AGGR_CLEANUP();
-                                       throw(MAL, "array.@4", "values of 
dimension %d are not sorted %s",
-                                               i, i ? "within last value of 
preceeding dimension" : "");
-                               }
-                               BBPunfix(slice->batCacheid);
-                       }
-               }
-
                arrsze /= dSize[i];
                assert(arrsze);
        }
 
-       /* access tails as arrays */
-       bValT = (@1*) Tloc(bVal, BUNfirst(bVal));
-       for (i = 0; i < ndims; i++) {
-               bDimsT[i] = (@8*) Tloc(bDims[i], BUNfirst(bDims[i]));
-               bOffsetsT[i] = (@8*) Tloc(bOffsets[i], BUNfirst(bOffsets[i]));
-               if (bDimsT[i][0] != dMin[i]) {
-                       AGGR_CLEANUP();
-                       throw(MAL, "array.@4", "first value of dimension %d 
(%d) is not its minimum (%d)",
-                               i, (int) bDimsT[i][0], (int) dMin[i]);
-               }
-               if (bDimsT[i][arrcnt-1] != dMax[i]) {
-                       AGGR_CLEANUP();
-                       throw(MAL, "array.@4", "last value of dimension %d (%d) 
is not its maximum (%d)",
-                               i, (int) bDimsT[i][arrcnt-1], (int) dMax[i]);
-               }
-       }
-
        /* For each anchor piont, compute all cells belong to this tile 
(bVal.head
         * is the group nr.) and compute the SUM */
        bRes =  BATnew(TYPE_void, TYPE_@3, BATcount(bVal));
@@ -752,6 +781,7 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk
                bResT[p] = (cnt ? @7 : @3_nil);
                nils |= !cnt;
        }
+
        AGGR_CLEANUP();
        BATsetcount(bRes, arrcnt);
        BATseqbase(bRes, arrbase);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to