Changeset: d4edbc92fb75 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/d4edbc92fb75 Modified Files: sql/backends/monet5/rel_bin.c sql/backends/monet5/sql_statement.c sql/backends/monet5/sql_statement.h sql/common/sql_types.c sql/server/rel_updates.c Branch: copyintobinary Log Message:
Generate COPY BINARY code in rel_bin instead of in optimizer diffs (287 lines): diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c --- a/sql/backends/monet5/rel_bin.c +++ b/sql/backends/monet5/rel_bin.c @@ -1109,6 +1109,140 @@ exp2bin_coalesce(backend *be, sql_exp *f return res; } +static stmt * +emit_loadcolumn(backend *be, stmt *importTable_args[], int *count_var, node *file_node, node *type_node) +{ + MalBlkPtr mb = be->mb; + + sql_exp *file_exp = file_node->data; + stmt *file_stmt = exp_bin(be, file_exp, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0); + sql_subtype *subtype = type_node->data; + int data_type = subtype->type->localtype; + int bat_type = newBatType(data_type); + + // The sql.importColumn operator takes a 'method' string to determine how to + // load the data. This leaves the door open to have multiple loaders for the + // same backend type, for example nul- and newline terminated strings. + // For the time being we just use the name of the storage type as the method + // name. + const char *method = ATOMname(data_type); + + // arg("sname",str),arg("tname",str),arg("onclient",int),arg("bswap",bit) + stmt *onclient_arg = importTable_args[2]; + stmt *bswap_arg = importTable_args[3]; + + + int new_count_var = newTmpVariable(mb, TYPE_oid); + + InstrPtr p = newStmt(mb, sqlRef, importColumnRef); + setArgType(mb, p, 0, bat_type); + p = pushReturn(mb, p, new_count_var); + // + p = pushStr(mb, p, method); + p = pushArgument(mb, p, bswap_arg->nr); + p = pushArgument(mb, p, file_stmt->nr); + p = pushArgument(mb, p, onclient_arg->nr); + if (*count_var < 0) + p = pushOid(mb, p, 0); + else + p = pushArgument(mb, p, *count_var); + + *count_var = new_count_var; + + stmt *s = stmt_magic(be, subtype, p); + return s; +} + +static int +node_type_score(node *n) +{ + sql_subtype *st = n->data; + int tpe = st->type->localtype; + int stpe = ATOMstorage(tpe); + int score = stpe + (stpe == TYPE_bit); + return score; +} + +static stmt* +exp2bin_copyfrombinary(backend *be, sql_exp *fe, stmt *left, stmt *right, stmt *isel, int depth) +{ + (void)depth; + mvc *sql = be->mvc; + assert(left == NULL); (void)left; + assert(right == NULL); (void)right; + assert(isel == NULL); (void)isel; + (void)be; + (void)fe; + sql_subfunc *f = fe->f; + + list *arg_list = fe->l; + list *type_list = f->res; + + // There are four arguments preceding the list of files. + // Translate them and remember the variable number of the result. + assert(4 + list_length(type_list) == list_length(arg_list)); + node *argnode = arg_list->h; + stmt *arg_stmts[4] = { 0 }; + for (int i = 0; i < 4; i++) { + sql_exp *arg_exp = argnode->data; + arg_stmts[i] = exp_bin(be, arg_exp, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0); + argnode = argnode->next; + } + + // If it's on server we can optimize a little + bool onserver = false; + node *onclient_arg = arg_list->h->next->next; + sql_exp *onclient_exp = onclient_arg->data; + if (onclient_exp->type == e_atom) { + atom *onclient_atom = onclient_exp->l; + int onclient = onclient_atom->data.val.ival; + onserver = (onclient == 0); + } + + node *const first_file = argnode; + node *const first_type = type_list->h; + node *file, *type; + + // The first column we load determines the number of rows. + // We pass it on to the other columns. + // The first column to load should therefore be an 'easy' one. + // We identify columns by their type node. + node *prototype_file = first_file; + node *prototype_type = first_type; + int score = node_type_score(prototype_type); + for (file = first_file->next, type = first_type->next; file && type; file = file->next, type = type->next) { + int sc = node_type_score(type); + if (sc < score) { + prototype_file = file; + prototype_type = type; + score = sc; + } + } + + // Emit the columns + int count_var = -1; + list *columns = sa_list(sql->sa); + stmt *prototype_stmt = emit_loadcolumn(be, arg_stmts, &count_var, prototype_file, prototype_type); + if (!prototype_stmt) + return NULL; + int orig_count_var = count_var; + for (file = first_file, type = first_type; file && type; file = file->next, type = type->next) { + stmt *s; + if (type == prototype_type) { + s = prototype_stmt; + } else { + s = emit_loadcolumn(be, arg_stmts, &count_var, file, type); + if (!s) + return NULL; + } + list_append(columns, s); + if (onserver) + count_var = orig_count_var; + } + + return stmt_list(be, columns); +} + stmt * exp_bin(backend *be, sql_exp *e, stmt *left, stmt *right, stmt *grp, stmt *ext, stmt *cnt, stmt *sel, int depth, int reduce, int push) { @@ -1262,16 +1396,19 @@ exp_bin(backend *be, sql_exp *e, stmt *l } assert(!e->r); if (strcmp(mod, "") == 0 && strcmp(fimp, "") == 0) { - if (strcmp(f->func->base.name, "star") == 0) { + const char *fname = f->func->base.name; + if (strcmp(fname, "star") == 0) { if (!left) return const_column(be, stmt_bool(be, 1)); return left->op4.lval->h->data; - } if (strcmp(f->func->base.name, "case") == 0) + } if (strcmp(fname, "case") == 0) return exp2bin_case(be, e, left, right, sel, depth); - if (strcmp(f->func->base.name, "casewhen") == 0) + if (strcmp(fname, "casewhen") == 0) return exp2bin_casewhen(be, e, left, right, sel, depth); - if (strcmp(f->func->base.name, "coalesce") == 0) + if (strcmp(fname, "coalesce") == 0) return exp2bin_coalesce(be, e, left, right, sel, depth); + if (strcmp(fname, "copyfrombinary") == 0) + return exp2bin_copyfrombinary(be, e, left, right, sel, depth); } if (!list_empty(exps)) { unsigned nrcols = 0; diff --git a/sql/backends/monet5/sql_statement.c b/sql/backends/monet5/sql_statement.c --- a/sql/backends/monet5/sql_statement.c +++ b/sql/backends/monet5/sql_statement.c @@ -491,6 +491,21 @@ stmt_temp(backend *be, sql_subtype *t) } stmt * +stmt_magic(backend *be, sql_subtype *t, InstrPtr q) +{ + if (q == NULL) + return NULL; + stmt *s = stmt_create(be->mvc->sa, st_result); + s->op4.typeval = *t; + s->nrcols = 1; + s->q = q; + s->nr = getDestVar(q); + s->flag = 0; + return s; +} + + +stmt * stmt_tid(backend *be, sql_table *t, int partition) { int tt = TYPE_oid; @@ -1038,6 +1053,7 @@ stmt_result(backend *be, stmt *s, int nr ns->nr = s->nr; } ns->op1 = s; + ns->op4.typeval = *sql_bind_localtype("oid"); ns->flag = nr; ns->nrcols = s->nrcols; ns->key = s->key; @@ -2465,6 +2481,10 @@ stmt_rs_column(backend *be, stmt *rs, in s->q = q; s->nr = getArg(q, s->flag); return s; + } else if (rs->type == st_list) { + list *cols = rs->op4.lval; + if (i < list_length(cols)) + return list_fetch(cols, i); } return NULL; } @@ -3855,10 +3875,11 @@ tail_type(stmt *st) /* fall through */ case st_reorder: case st_group: - case st_result: case st_tid: case st_mirror: return sql_bind_localtype("oid"); + case st_result: + return &st->op4.typeval; case st_table_clear: return sql_bind_localtype("lng"); case st_aggr: diff --git a/sql/backends/monet5/sql_statement.h b/sql/backends/monet5/sql_statement.h --- a/sql/backends/monet5/sql_statement.h +++ b/sql/backends/monet5/sql_statement.h @@ -247,6 +247,8 @@ extern stmt *stmt_func(backend *be, stmt extern stmt *stmt_direct_func(backend *be, InstrPtr q); extern stmt *stmt_aggr(backend *be, stmt *op1, stmt *grp, stmt *ext, sql_subfunc *op, int reduce, int no_nil, int nil_if_empty); +extern stmt *stmt_magic(backend *be, sql_subtype *t, InstrPtr q); + extern stmt *stmt_alias(backend *be, stmt *op1, const char *tname, const char *name); extern int stmt_output(backend *be, stmt *l); diff --git a/sql/common/sql_types.c b/sql/common/sql_types.c --- a/sql/common/sql_types.c +++ b/sql/common/sql_types.c @@ -1473,7 +1473,7 @@ sqltypeinit( sql_allocator *sa) f->varres = 1; /* bincopyfrom */ - f = sql_create_union(sa, "copyfrom", "sql", "importTable", TRUE, SCALE_FIX, 0, TABLE, 3, STR, STR, INT); + f = sql_create_union(sa, "copyfrombinary", "", "", TRUE, SCALE_FIX, 0, TABLE, 3, STR, STR, INT); f->varres = 1; /* sys_update_schemas, sys_update_tables */ diff --git a/sql/server/rel_updates.c b/sql/server/rel_updates.c --- a/sql/server/rel_updates.c +++ b/sql/server/rel_updates.c @@ -1744,8 +1744,9 @@ bincopyfrom(sql_query *query, dlist *qna list *exps, *args; sql_subtype strtpe; sql_exp *import; - sql_subfunc *f = sql_find_func(sql, "sys", "copyfrom", 3, F_UNION, true, NULL); + sql_subfunc *f = sql_find_func(sql, "sys", "copyfrombinary", 3, F_UNION, true, NULL); list *collist; + list *typelist; assert(f); if (!copy_allowed(sql, 1)) @@ -1758,13 +1759,18 @@ bincopyfrom(sql_query *query, dlist *qna if (files == NULL) return sql_error(sql, 02, SQLSTATE(42000) "COPY INTO: must specify files"); - collist = check_table_columns(sql, t, columns, "COPY BINARY INTO", tname); - if (!collist) - return NULL; - bool do_byteswap = (endian != endian_native && endian != OUR_ENDIANNESS); - f->res = table_column_types(sql->sa, t); + typelist = sa_list(sql->sa); + collist = check_table_columns(sql, t, columns, "COPY BINARY INTO", tname); + if (!collist || !typelist) + return NULL; + for (node *n = collist->h; n; n = n->next) { + sql_column *c = n->data; + sa_list_append(sql->sa, typelist, &c->type); + } + f->res = typelist; + sql_find_subtype(&strtpe, "varchar", 0, 0); args = append( append( append( append( new_exp_list(sql->sa), exp_atom_str(sql->sa, t->s?t->s->base.name:NULL, &strtpe)), _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org