Changeset: 71ba9385dbfa for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=71ba9385dbfa Modified Files: monetdb5/modules/mal/array.mx Branch: SciQL-2 Log Message:
ARRAYtiles*(): cleaned-up & extended sanity checks expensive checks that require data scans are only done when assertions or property checking are enabled Our current implementation of tiled array aggregations is restricted to: - all dimensions must be of the same type - only dimension types SMALLINT (bte), TINYINY (sht), INT(EGER) (int) are supported - dimensions must be ascending, i.e., start <= stop && step > 0 - only step-size 1 is supported - arrays must be stored "canonically", i.e., sorted (ascending) on first dimension, and each subsequent dimension sub-sorted (ascending) within each value of its preceding dimension diffs (214 lines): diff --git a/monetdb5/modules/mal/array.mx b/monetdb5/modules/mal/array.mx --- a/monetdb5/modules/mal/array.mx +++ b/monetdb5/modules/mal/array.mx @@ -558,6 +558,15 @@ ARRAYfiller(Client cntxt, MalBlkPtr mb, * dim_1:BAT,tile_member_offset_dim_1:BAT,size_dim_1, ..., * dim_n:BAT,tile_member_offset_dim_n:BAT,size_dim_n) */ +/* + * CAVEATs: + * - all dimensions must be of the same type + * - only dimension types SMALLINT (bte), TINYINY (sht), INT(EGER) (int) are supported + * - dimensions must be ascending, i.e., start <= stop && step > 0 + * - only step-size 1 is supported + * - array must be stored "canonically", i.e., sorted (ascending) on first dimension, + * and each subsequent dimension sub-sorted (ascending) within each value of its preceding dimension + */ str ARRAYtiles_@4_@1_@8(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { @@ -577,6 +586,7 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk if ( (pci->argc - pci->retc - 1) % 3 != 0) throw(MAL, "array.@4", "Unbalanced argument sets"); + /* allocate local data structures */ ndims = (pci->argc - pci->retc - 1) / 3; bDims = (BAT**) GDKzalloc(sizeof(BAT*) * ndims); bDimsT = (@8**) GDKzalloc(sizeof(@8*) * ndims); @@ -589,6 +599,8 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk AGGR_CLEANUP(); throw(MAL, "array.@4", MAL_MALLOC_FAIL); } + + /* handle & chack arguments */ if (!(bVal = BATdescriptor(*(bat*)getArgReference(stk,pci,1)))) { AGGR_CLEANUP(); throw(MAL, "array.@4", RUNTIME_OBJECT_MISSING); @@ -602,20 +614,21 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk throw(MAL, "array.@4", RUNTIME_OBJECT_MISSING); } dSize[i] = *(int*)getArgReference(stk,pci,2+i*3+2); - if (dSize[i] == 0) { + if (dSize[i] <= 0) { AGGR_CLEANUP(); - throw(MAL, "array.@4", "size dimension %d must not be 0", i); + throw(MAL, "array.@4", "size of dimension %d (%d) must not be <= 0", i, (int) dSize[i]); } arrsze *= dSize[i]; } - /* type check the shapes, prepare iterators, and compute the min/max of the dimensions */ + + /* check sanity of value BAT */ if (!BAThdense(bVal)) { AGGR_CLEANUP(); - throw(MAL, "array.@4", "head of value BAT must be dense"); + throw(MAL, "array.@4", "head of value BAT is not dense"); } if (bVal->ttype != TYPE_@1) { AGGR_CLEANUP(); - throw(MAL, "array.@4", "tail of value BAT must be of type @1"); + throw(MAL, "array.@4", "tail type of value BAT is not type @1"); } arrcnt = BATcount(bVal); if (arrcnt != arrsze) { @@ -623,6 +636,10 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk throw(MAL, "array.@4", "count of value BAT ("BUNFMT") != product of dimension sizes ("BUNFMT")", arrcnt, arrsze); } + /* access tail as array */ + bValT = (@1*) Tloc(bVal, BUNfirst(bVal)); + + /* check sanity of dimension & offset BATs properties */ arrbase = bVal->hseqbase; offbase = bOffsets[0]->hseqbase; offcnt = BATcount(bOffsets[0]); @@ -643,69 +660,81 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk AGGR_CLEANUP(); throw(MAL, "array.@4", "head of offset BAT %d is not aligned with head of offset BAT 0", i); } - if (bDims[i]->ttype != TYPE_@8 || bDims[i]->ttype != bOffsets[i]->ttype) { + if (bDims[i]->ttype != TYPE_@8) { AGGR_CLEANUP(); throw(MAL, "array.@4", "tail type of dimension BAT %d is not type @8", i); } - /* ! might require 2 full scans ! */ - BATmin(bDims[i], &(dMin[i])); - BATmax(bDims[i], &(dMax[i])); + if (bOffsets[i]->ttype != TYPE_@8) { + AGGR_CLEANUP(); + throw(MAL, "array.@4", "tail type of offset BAT %d is not type @8", i); + } + } + + /* check sanity of dimension BATs content */ + for (i = 0; i < ndims; i++) { + /* access tails as arrays */ + bDimsT[i] = (@8*) Tloc(bDims[i], BUNfirst(bDims[i])); + bOffsetsT[i] = (@8*) Tloc(bOffsets[i], BUNfirst(bOffsets[i])); + + /* be optimistic */ + dMin[i] = bDimsT[i][0]; + dMax[i] = bDimsT[i][arrcnt-1]; + +#ifdef NDEBUG + PROPDEBUG +#endif + { + /* this might require several full scans; hence, + * only done when assertions or property checking + * are enabled */ + + BATmin(bDims[i], &(dMin[i])); + BATmax(bDims[i], &(dMax[i])); + + for (p = 0, r = arrsze; p < arrcnt; p += arrsze, r += arrsze) { + BAT *slice = BATslice(bDims[i],p,r); + + BATderiveHeadProps(BATmirror(slice),0); + if (!BATtordered(slice)) { + BBPunfix(slice->batCacheid); + AGGR_CLEANUP(); + if (i == 0) { + throw(MAL, "array.@4", "values of dimension %d are not sorted", i); + } else { + throw(MAL, "array.@4", "values of dimension %d are not sorted" + " within value "BUNFMT" of preceeding dimension", i, p / arrsze); + } + } + BBPunfix(slice->batCacheid); + + if (bDimsT[i][p] != dMin[i]) { + AGGR_CLEANUP(); + throw(MAL, "array.@4", "first value of slice "BUNFMT" of dimension %d (%d) is not the minimum (%d)", + p / arrsze, i, (int) bDimsT[i][p], (int) dMin[i]); + } + if (bDimsT[i][r-1] != dMax[i]) { + AGGR_CLEANUP(); + throw(MAL, "array.@4", "last value of slice "BUNFMT" of dimension %d (%d) is not the maximum (%d)", + p / arrsze, i, (int) bDimsT[i][r-1], (int) dMax[i]); + } + } + } + + if (dMin[i] > dMax[i]) { + AGGR_CLEANUP(); + throw(MAL, "array.@4", "minimum value of dimension BAT %d (%d) must not be larger than its maximum value (%d)", + i, (int) dMin[i], (int) dMax[i]); + } if ((int) (dMax[i] - dMin[i] + 1) != dSize[i]) { AGGR_CLEANUP(); throw(MAL, "array.@4", "range of dimension BAT %d (%d) does not match its size (%d)", i, (int) (dMax[i] - dMin[i] + 1), dSize[i]); } - /* might require (partial) scans; hence, only done when - * assertions or property checking enabled */ -#ifdef NDEBUG - PROPDEBUG -#endif - { - BAT *slice; - slice = BATslice(bDims[i],0,arrsze); - BATderiveHeadProps(BATmirror(slice),0); - if (!BATtordered(slice)) { - BBPunfix(slice->batCacheid); - AGGR_CLEANUP(); - throw(MAL, "array.@4", "values of dimension %d are not sorted %s", - i, i ? "within first value of preceeding dimension" : ""); - } - BBPunfix(slice->batCacheid); - if (arrsze < arrcnt) { - slice = BATslice(bDims[i],arrcnt-arrsze,arrcnt); - BATderiveHeadProps(BATmirror(slice),0); - if (!BATtordered(slice)) { - BBPunfix(slice->batCacheid); - AGGR_CLEANUP(); - throw(MAL, "array.@4", "values of dimension %d are not sorted %s", - i, i ? "within last value of preceeding dimension" : ""); - } - BBPunfix(slice->batCacheid); - } - } - arrsze /= dSize[i]; assert(arrsze); } - /* access tails as arrays */ - bValT = (@1*) Tloc(bVal, BUNfirst(bVal)); - for (i = 0; i < ndims; i++) { - bDimsT[i] = (@8*) Tloc(bDims[i], BUNfirst(bDims[i])); - bOffsetsT[i] = (@8*) Tloc(bOffsets[i], BUNfirst(bOffsets[i])); - if (bDimsT[i][0] != dMin[i]) { - AGGR_CLEANUP(); - throw(MAL, "array.@4", "first value of dimension %d (%d) is not its minimum (%d)", - i, (int) bDimsT[i][0], (int) dMin[i]); - } - if (bDimsT[i][arrcnt-1] != dMax[i]) { - AGGR_CLEANUP(); - throw(MAL, "array.@4", "last value of dimension %d (%d) is not its maximum (%d)", - i, (int) bDimsT[i][arrcnt-1], (int) dMax[i]); - } - } - /* For each anchor piont, compute all cells belong to this tile (bVal.head * is the group nr.) and compute the SUM */ bRes = BATnew(TYPE_void, TYPE_@3, BATcount(bVal)); @@ -752,6 +781,7 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk bResT[p] = (cnt ? @7 : @3_nil); nils |= !cnt; } + AGGR_CLEANUP(); BATsetcount(bRes, arrcnt); BATseqbase(bRes, arrbase); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list