Changeset: 5e0839367a63 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=5e0839367a63 Modified Files: monetdb5/modules/mal/array.mx sql/backends/monet5/sciql.c sql/server/rel_schema.c Branch: SciQL-2 Log Message:
some comments, printouts and renames to help me understand diffs (truncated from 390 to 300 lines): diff --git a/monetdb5/modules/mal/array.mx b/monetdb5/modules/mal/array.mx --- a/monetdb5/modules/mal/array.mx +++ b/monetdb5/modules/mal/array.mx @@ -607,7 +607,7 @@ ARRAYfiller(Client cntxt, MalBlkPtr mb, * - only dimension types SMALLINT (bte), TINYINY (sht), INT(EGER) (int) are supported * - dimensions must be ascending, i.e., start <= stop && step > 0 * - only step-size 1 is supported - * - array must be stored "canonically", i.e., sorted (ascending) on first dimension, + * - array must be stored ", p, (double)bRest[p]);canonically", i.e., sorted (ascending) on first dimension, * and each subsequent dimension sub-sorted (ascending) within each value of its preceding dimension */ str @@ -665,6 +665,8 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk arrsze *= dSize[i]; } +//fprintf(stderr, "arrsze = %u\n", (unsigned int)arrsze); + /* check sanity of value BAT */ if (!BAThdense(bVal)) { AGGR_CLEANUP(); @@ -675,6 +677,8 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk throw(MAL, "array.@4", "tail type of value BAT is not type @1"); } arrcnt = BATcount(bVal); +fprintf(stderr, "arrcnt = %u\n", (unsigned int)arrcnt); + if (arrcnt != arrsze) { AGGR_CLEANUP(); throw(MAL, "array.@4", "count of value BAT ("BUNFMT") != product of dimension sizes ("BUNFMT")", @@ -726,8 +730,9 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk /* access tails as arrays */ bDimsT[i] = (@8*) Tloc(bDims[i], BUNfirst(bDims[i])); bOffsetsT[i] = (@8*) Tloc(bOffsets[i], BUNfirst(bOffsets[i])); - - /* be optimistic */ +for(p=0; p<BATcount(bOffsets[i]); p++) + fprintf(stderr, "%u: %d - %d\n", (unsigned int)i, (int)bDimsT[i][p], (int)bOffsetsT[i][p]); + /* be optimistic, i.e. assume that BATs are ordered and subordered (e.g. y subordered on x etc.) */ dMin[i] = bDimsT[i][0]; dMax[i] = bDimsT[i][arrcnt-1]; @@ -802,6 +807,7 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk offset[r] += mul * bOffsetsT[i][r]; mul *= dSize[i]; } +fprintf(stderr, "offset: %u => %f\n", (unsigned int)r, (double)offset[r]); } /* For each anchor piont, compute all cells belong to this tile (bVal.head @@ -838,14 +844,15 @@ ARRAYtiles_@4_@1_@8(Client cntxt, MalBlk } if (!skip) { @1 elm = bValT[vid]; - if (elm != @1_nil) { - @6; + @6; //@6 := sum += elm (if sum) cnt++; } +fprintf(stderr, "elm: (p=%u, r=%u) => %d -> %f\n", (unsigned int)p, (unsigned int)r, (int)elm, (double)@7); } } - bResT[p] = (cnt ? @7 : @3_nil); + bResT[p] = (cnt ? @7 : @3_nil); //@7 := sum (if sum) +fprintf(stderr, "\t%u: %f\n", (unsigned int)p, (double)bResT[p]); nils |= !cnt; } diff --git a/sql/backends/monet5/sciql.c b/sql/backends/monet5/sciql.c --- a/sql/backends/monet5/sciql.c +++ b/sql/backends/monet5/sciql.c @@ -23,8 +23,8 @@ #define MTRL_CLEANUP() \ { \ - if (N) GDKfree(N); \ - if (M) GDKfree(M); \ + if (idxRepetitionsPerGroup) GDKfree(idxRepetitionsPerGroup); \ + if (groups) GDKfree(groups); \ if (bids) { \ for (i = 0; i < list_length(a->columns.set); i++) { \ if (bids[i] != 0) \ @@ -58,16 +58,18 @@ static str return createException(type, fcn, "%s", msg); } -str -SCIQLmaterialise(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) -{ +/* It is called in each update query involving an array. If the array is materialised nothing happens. + * Otherwise, it creates all necessary BATs initialising them with the appropriate values (the + * non-dimensional BAT is initialised with the default value). The query is handled then as any other + * query */ +str SCIQLmaterialise(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci) { mvc *sql = NULL; str msg = getSQLContext(cntxt, mb, &sql, NULL); - str sname = *(str*) getArgReference(stk, pci, 1); - str aname = *(str*) getArgReference(stk, pci, 2); + str schema_name = *(str*) getArgReference(stk, pci, 1); + str array_name = *(str*) getArgReference(stk, pci, 2); sql_schema *s = NULL; sql_table *a = NULL; - int i = 0, j = 0, *N = NULL, *M = NULL, *bids = NULL; + int i = 0, j = 0, *idxRepetitionsPerGroup = NULL, *groups = NULL, *bids = NULL; BUN cntall = 1; /* cntall must be initialised with 1! */ node *n = NULL; @@ -82,50 +84,51 @@ SCIQLmaterialise(Client cntxt, MalBlkPtr if (msg) return msg; - if (!(s = mvc_bind_schema(sql, sname))) + if (!(s = mvc_bind_schema(sql, schema_name))) throw(MAL, "sciql.materialise", RUNTIME_OBJECT_MISSING); - if (!(a = mvc_bind_table(sql, s, aname))) + if (!(a = mvc_bind_table(sql, s, array_name))) throw(MAL, "sciql.materialise", RUNTIME_OBJECT_MISSING); - if (a->materialised) + if (a->materialised) //If materialised then it is treated than any other BAT (should this change? Does it make sense to have partially initialised BATs?) return MAL_SUCCEED; - /* To compute N (the #times each value is repeated), multiply the size of - * dimensions defined after the current dimension. For the last dimension, - * its N is 1. To compute M (the #times each value group is repeated), - * multiply the size of dimensions defined before the current dimension. - * For the first dimension, its M is 1. */ - N = GDKmalloc(a->valence * sizeof(int)); /* #repeats of each value */ - M = GDKmalloc(a->valence * sizeof(int)); /* #repeats of each group of values */ + /* When having a single dimension, e.g. x then each value corresponds to a different x_idx, x_idx=0, x_idx=1, etc. + * When having more dimensions, e.g. x and y then each value corresponds to a different pair of idxs, + * e.g. (x_idx=0, y_idx=0), (x_idx=0, y_idx=1), (x_idx=0, y_idx=2) etc. From the example is obvious that until all + * possible values of y_idx has been used the x_idx will be 0, when all y_idx values has been used x_idx will increase + * and the y_idx values will start from the beggining. So, the idxRepettitionsPerGroup for y is 1 (the y_idx is changed + * in each repetition) while the groups groups for y equals the number of x_idx values (all y_idx values are repeated for + * each value of x_idx) */ + idxRepetitionsPerGroup = GDKmalloc(a->valence * sizeof(int)); /* #repeats of each value */ + groups = GDKmalloc(a->valence * sizeof(int)); /* #repeats of each group of values */ bids = GDKzalloc(list_length(a->columns.set) * sizeof(int)); /* BAT ids for each column */ - if (!N || !M || !bids) { + if (!idxRepetitionsPerGroup || !groups || !bids) { MTRL_CLEANUP(); throw(MAL, "sciql.materialise", MAL_MALLOC_FAIL); } for (i = 0; i < a->valence; i++) - N[i] = M[i] = 1; + idxRepetitionsPerGroup[i] = groups[i] = 1; for (n = a->columns.set->h, i = 0; n; n = n->next) { sql_column *sc = n->data; if (sc->dim) { /* TODO: overflow check, see gdk_calc.c */ - lng cnt_l = sc->dim->step > 0 ? (sc->dim->stop - sc->dim->strt + sc->dim->step - 1) / sc->dim->step : - (sc->dim->strt - sc->dim->stop - sc->dim->step - 1) / -sc->dim->step; - int cnt_i = (int) cnt_l; + lng dimensionElementsNum_lng = 1+(sc->dim->step > 0 ? (sc->dim->stop-1 - sc->dim->strt) / sc->dim->step : (sc->dim->strt - 1 - sc->dim->stop) / -sc->dim->step); + int dimensionElementsNum_int = (int) dimensionElementsNum_lng; #ifndef NDEBUG - int lim = GDK_int_max / cnt_i; + int lim = GDK_int_max / dimensionElementsNum_int; #endif - assert(cnt_l <= (lng) GDK_int_max); + assert(dimensionElementsNum_lng <= (lng) GDK_int_max); for (j = 0; j < i; j++) { - assert(N[j] <= lim); - N[j] *= cnt_i; + assert(idxRepetitionsPerGroup[j] <= lim); + idxRepetitionsPerGroup[j] *= dimensionElementsNum_int; } for (j = a->valence - 1; j > i; j--) { - assert(M[j] <= lim); - M[j] *= cnt_i; + assert(groups[j] <= lim); + groups[j] *= dimensionElementsNum_int; } - assert((BUN) cnt_i <= BUN_MAX / cntall); - cntall *= cnt_i; + assert((BUN) dimensionElementsNum_int <= BUN_MAX / cntall); + cntall *= dimensionElementsNum_int; i++; } } @@ -134,27 +137,27 @@ SCIQLmaterialise(Client cntxt, MalBlkPtr for (n = a->columns.set->h, i = 0; n; n = n->next, i++){ sql_column *sc = n->data; BAT *bn = NULL; - int tpe = sc->type.type->localtype; + int tpe = sc->type.type->localtype; //the type of the column if (sc->dim) { switch (tpe) { case TYPE_bte: - ARRAYseries_bte(&bids[i], (bte *)&sc->dim->strt, (bte *)&sc->dim->step, (bte *)&sc->dim->stop, &N[i], &M[i]); + ARRAYseries_bte(&bids[i], (bte *)&sc->dim->strt, (bte *)&sc->dim->step, (bte *)&sc->dim->stop, &idxRepetitionsPerGroup[i], &groups[i]); break; case TYPE_sht: - ARRAYseries_sht(&bids[i], (sht *)&sc->dim->strt, (sht *)&sc->dim->step, (sht *)&sc->dim->stop, &N[i], &M[i]); + ARRAYseries_sht(&bids[i], (sht *)&sc->dim->strt, (sht *)&sc->dim->step, (sht *)&sc->dim->stop, &idxRepetitionsPerGroup[i], &groups[i]); break; case TYPE_int: - ARRAYseries_int(&bids[i], (int *)&sc->dim->strt, (int *)&sc->dim->step, (int *)&sc->dim->stop, &N[i], &M[i]); + ARRAYseries_int(&bids[i], (int *)&sc->dim->strt, (int *)&sc->dim->step, (int *)&sc->dim->stop, &idxRepetitionsPerGroup[i], &groups[i]); break; case TYPE_lng: - ARRAYseries_lng(&bids[i], (lng *)&sc->dim->strt, (lng *)&sc->dim->step, (lng *)&sc->dim->stop, &N[i], &M[i]); + ARRAYseries_lng(&bids[i], (lng *)&sc->dim->strt, (lng *)&sc->dim->step, (lng *)&sc->dim->stop, &idxRepetitionsPerGroup[i], &groups[i]); break; case TYPE_flt: - ARRAYseries_flt(&bids[i], (flt *)&sc->dim->strt, (flt *)&sc->dim->step, (flt *)&sc->dim->stop, &N[i], &M[i]); + ARRAYseries_flt(&bids[i], (flt *)&sc->dim->strt, (flt *)&sc->dim->step, (flt *)&sc->dim->stop, &idxRepetitionsPerGroup[i], &groups[i]); break; case TYPE_dbl: - ARRAYseries_dbl(&bids[i], (dbl *)&sc->dim->strt, (dbl *)&sc->dim->step, (dbl *)&sc->dim->stop, &N[i], &M[i]); + ARRAYseries_dbl(&bids[i], (dbl *)&sc->dim->strt, (dbl *)&sc->dim->step, (dbl *)&sc->dim->stop, &idxRepetitionsPerGroup[i], &groups[i]); break; default: MTRL_CLEANUP(); @@ -165,12 +168,13 @@ SCIQLmaterialise(Client cntxt, MalBlkPtr MTRL_CLEANUP(); throw(MAL, "sciql.materialise", "Cannot access descriptor"); } - } else { + } else { //non-dimensional column ValRecord src, dst; int ret = 0; src.vtype = TYPE_str; - if (sc->def) { + if (sc->def) { //the column has a default value + //no mater what the default value is all values set will have the same value size_t l = strlen(sc->def); if (l == 4 && (sc->def[0] == 'n' || sc->def[0] == 'N') && (sc->def[1] == 'u' || sc->def[1] == 'U') && @@ -197,6 +201,7 @@ SCIQLmaterialise(Client cntxt, MalBlkPtr return _rethrow(MAL, "sciql.materialise", "string conversion failed"); } + /* Fill the BAT with the constant value */ switch(tpe) { /* TODO: check for overflow */ case TYPE_bte: bn = BATconstant(tpe, &dst.val.btval, cntall); @@ -239,11 +244,11 @@ SCIQLmaterialise(Client cntxt, MalBlkPtr systable = find_sql_table(syss, "_tables"); sysarray = find_sql_table(syss, "_arrays"); /* find 'id' of this array in _tables */ - rid = table_funcs.column_find_row(tr, find_sql_column(systable, "name"), aname, NULL); - tid = *(sqlid*) table_funcs.column_find_value(tr, find_sql_column(systable, "id"), rid); + rid = table_funcs.column_find_row(tr, find_sql_column(systable, "name"), array_name, NULL); //find the row in _tables that is about the processed array + tid = *(sqlid*) table_funcs.column_find_value(tr, find_sql_column(systable, "id"), rid); //find the column in the above row that is about id /* update value in _arrays */ - rid = table_funcs.column_find_row(tr, find_sql_column(sysarray, "table_id"), &tid, NULL); - table_funcs.column_update_value(tr, find_sql_column(sysarray, "materialised"), rid, &materialised); + rid = table_funcs.column_find_row(tr, find_sql_column(sysarray, "table_id"), &tid, NULL); //find the row in arrays that is about the processed array + table_funcs.column_update_value(tr, find_sql_column(sysarray, "materialised"), rid, &materialised); //set the value in the column about materialised a->materialised = 1; return MAL_SUCCEED; diff --git a/sql/server/rel_schema.c b/sql/server/rel_schema.c --- a/sql/server/rel_schema.c +++ b/sql/server/rel_schema.c @@ -891,74 +891,76 @@ rel_create_table(mvc *sql, sql_schema *s mvc_create_table(sql, s, name, tt, 0, SQL_DECLARED_TABLE, commit_action, -1); dnode *n; dlist *columns = table_elements_or_subquery->data.lval; - list *drngs = sa_list(sql->sa); + list *dimensionsRanges = sa_list(sql->sa); - for (n = columns->h; n; n = n->next) { - symbol *sym = n->data.sym; + for (n = columns->h; n; n = n->next) { //for each column + symbol *sym = n->data.sym; //each column is represented with a symbol int res = table_element(sql, sym, s, t, 0); if (res == SQL_ERR) return NULL; /* A dimension column? Add the range expressions to rel_table */ - if (sym->token == SQL_COLUMN && sym->data.lval->cnt == 4) { - dnode *dn = sym->data.lval->h->next->next->next; - sql_column *dc = t->columns.set->t->data; - char *sqltpe = dc->type.type->sqlname; + if (sym->token == SQL_COLUMN && sym->data.lval->cnt == 4 && sym->data.lval->h->next->next->next->data.sym->token == SQL_DIMENSION) { + dnode *dimensionNode = sym->data.lval->h->next->next->next; + sql_column *dimensionColumn = t->columns.set->t->data; + char *sqltpe = dimensionColumn->type.type->sqlname; int is_int_dc = isAnSQLIntType(sqltpe, strlen(sqltpe)); - dlist *drng = NULL; + dlist *dimensionRange = dimensionNode->data.sym->data.lval; sql_exp *val_exp = NULL; exp_kind ek = {type_value, card_value, TRUE}; sql_subtype *lngtpe = sql_bind_localtype("lng"); if (!isArray(t)) - return sql_error(sql, 02, "%s %s: dimensions ('%s') not allowed in non-ARRAY\n", action, tt2string(tt), dc->base.name); + return sql_error(sql, 02, "%s %s: dimensions ('%s') not allowed in non-ARRAY\n", action, tt2string(tt), dimensionColumn->base.name); if (!isSupportedType(sqltpe)) return sql_error(sql, 02, "%s ARRAY: dimension type '%s' not supported yet\n", action, sqltpe); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list