Changeset: 700573773140 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/700573773140 Modified Files: sql/backends/monet5/sql.c sql/backends/monet5/sql_result.c sql/backends/monet5/vaults/csv/csv.c sql/common/sql_types.c sql/server/rel_updates.c sql/server/sql_parser.y Branch: default Log Message:
Merge 'decimaldelimiters' into 'default' diffs (truncated from 856 to 300 lines): diff --git a/monetdb5/modules/mal/tablet.h b/monetdb5/modules/mal/tablet.h --- a/monetdb5/modules/mal/tablet.h +++ b/monetdb5/modules/mal/tablet.h @@ -30,6 +30,8 @@ typedef struct Column_t { const char *name; /* column title */ const char *sep; const char *rsep; + char decsep; + char decskip; int seplen; const char *type; int adt; /* type index */ diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c --- a/sql/backends/monet5/sql.c +++ b/sql/backends/monet5/sql.c @@ -3140,6 +3140,8 @@ mvc_import_table_wrap(Client cntxt, MalB const char *fixed_widths = *getArgReference_str(stk, pci, pci->retc + 9); int onclient = *getArgReference_int(stk, pci, pci->retc + 10); bool escape = *getArgReference_int(stk, pci, pci->retc + 11); + const char *decsep = *getArgReference_str(stk, pci, pci->retc + 12); + const char *decskip = *getArgReference_str(stk, pci, pci->retc + 13); str msg = MAL_SUCCEED; bstream *s = NULL; stream *ss; @@ -3149,6 +3151,10 @@ mvc_import_table_wrap(Client cntxt, MalB return msg; if (onclient && !cntxt->filetrans) throw(MAL, "sql.copy_from", SQLSTATE(42000) "Cannot transfer files from client"); + if (strNil(decsep)) + throw(MAL, "sql.copy_from", SQLSTATE(42000) "decimal separator cannot be nil"); + if (strNil(decskip)) + decskip = NULL; be = cntxt->sqlcontext; /* The CSV parser expects ssep to have the value 0 if the user does not @@ -3160,7 +3166,7 @@ mvc_import_table_wrap(Client cntxt, MalB if (strNil(fname)) fname = NULL; if (fname == NULL) { - msg = mvc_import_table(cntxt, &b, be->mvc, be->mvc->scanner.rs, t, tsep, rsep, ssep, ns, sz, offset, besteffort, true, escape); + msg = mvc_import_table(cntxt, &b, be->mvc, be->mvc->scanner.rs, t, tsep, rsep, ssep, ns, sz, offset, besteffort, true, escape, decsep, decskip); } else { if (onclient) { ss = mapi_request_upload(fname, false, be->mvc->scanner.rs, be->mvc->scanner.ws); @@ -3218,7 +3224,7 @@ mvc_import_table_wrap(Client cntxt, MalB close_stream(ss); throw(MAL, "sql.copy_from", SQLSTATE(HY013) MAL_MALLOC_FAIL); } - msg = mvc_import_table(cntxt, &b, be->mvc, s, t, tsep, rsep, ssep, ns, sz, offset, besteffort, false, escape); + msg = mvc_import_table(cntxt, &b, be->mvc, s, t, tsep, rsep, ssep, ns, sz, offset, besteffort, false, escape, decsep, decskip); // This also closes ss: bstream_destroy(s); } @@ -5314,7 +5320,7 @@ static mel_func sql_init_funcs[] = { pattern("sql", "export_bin_column", mvc_bin_export_column_wrap, true, "export column as binary", args(1, 5, arg("", lng), batargany("col", 1), arg("byteswap", bit), arg("filename", str), arg("onclient", int))), pattern("sql", "export_bin_column", mvc_bin_export_column_wrap, true, "export column as binary", args(1, 5, arg("", lng), argany("val", 1), arg("byteswap", bit), arg("filename", str), arg("onclient", int))), pattern("sql", "affectedRows", mvc_affected_rows_wrap, true, "export the number of affected rows by the current query", args(1,3, arg("",int),arg("mvc",int),arg("nr",lng))), - pattern("sql", "copy_from", mvc_import_table_wrap, true, "Import a table from bstream s with the \ngiven tuple and seperators (sep/rsep)", args(1,13, batvarargany("",0),arg("t",ptr),arg("sep",str),arg("rsep",str),arg("ssep",str),arg("ns",str),arg("fname",str),arg("nr",lng),arg("offset",lng),arg("best",int),arg("fwf",str),arg("onclient",int),arg("escape",int))), + pattern("sql", "copy_from", mvc_import_table_wrap, true, "Import a table from bstream s with the \ngiven tuple and seperators (sep/rsep)", args(1,15, batvarargany("",0),arg("t",ptr),arg("sep",str),arg("rsep",str),arg("ssep",str),arg("ns",str),arg("fname",str),arg("nr",lng),arg("offset",lng),arg("best",int),arg("fwf",str),arg("onclient",int),arg("escape",int),arg("decsep",str),arg("decskip",str))), //we use bat.single now //pattern("sql", "single", CMDBATsingle, false, "", args(1,2, batargany("",2),argany("x",2))), pattern("sql", "importColumn", mvc_bin_import_column_wrap, false, "Import a column from the given file", args(2, 8, batargany("", 0),arg("", oid), arg("method",str),arg("width",int),arg("bswap",bit),arg("path",str),arg("onclient",int),arg("nrows",oid))), diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c --- a/sql/backends/monet5/sql_result.c +++ b/sql/backends/monet5/sql_result.c @@ -317,7 +317,9 @@ bat_max_length(hge, hge) } else if (*s == '+'){ \ s++; \ } \ - for (i = 0; *s && *s != '.' && ((res == 0 && *s == '0') || i < t->digits - t->scale); s++) { \ + for (i = 0; *s && *s != c->decsep && ((res == 0 && *s == '0') || i < t->digits - t->scale); s++) { \ + if (c->decskip && *s == c->decskip) \ + continue; \ if (!isdigit((unsigned char) *s)) \ break; \ res *= 10; \ @@ -325,12 +327,18 @@ bat_max_length(hge, hge) if (res) \ i++; \ } \ - if (*s == '.') { \ + if (*s == c->decsep) { \ s++; \ - while (*s && isdigit((unsigned char) *s) && scale > 0) { \ - res *= 10; \ - res += *s++ - '0'; \ - scale--; \ + while (*s && scale > 0) { \ + if (isdigit((unsigned char) *s)) { \ + res *= 10; \ + res += *s++ - '0'; \ + scale--; \ + } else if (c->decskip && *s == c->decskip) { \ + s++; \ + } else { \ + break; \ + } \ } \ } \ while(*s && isspace((unsigned char) *s)) \ @@ -356,6 +364,8 @@ bat_max_length(hge, hge) static void * dec_frstr(Column *c, int type, const char *s) { + assert(c->decsep != '\0'); + /* support dec map to bte, sht, int and lng */ if( strcmp(s,"nil")== 0) return NULL; @@ -395,7 +405,11 @@ sec_frstr(Column *c, int type, const cha neg = 0; s++; } - for (i = 0; i < (19 - 3) && *s && *s != '.'; i++, s++) { + for (i = 0; i < (19 - 3) && *s && *s != c->decsep; i++, s++) { + if (c->decskip && *s == c->decskip) { + i--; + continue; + } if (!isdigit((unsigned char) *s)) return NULL; res *= 10; @@ -403,10 +417,14 @@ sec_frstr(Column *c, int type, const cha } i = 0; if (*s) { - if (*s != '.') + if (*s != c->decsep) return NULL; s++; for (; *s && i < 3; i++, s++) { + if (c->decskip && *s == c->decskip) { + i--; + continue; + } if (!isdigit((unsigned char) *s)) return NULL; res *= 10; @@ -429,6 +447,64 @@ sec_frstr(Column *c, int type, const cha return (void *) r; } +static void * +fltdbl_frStr(Column *c, int type, const char *s) +{ + // The regular fltFromStr/dblFromStr functions do not take decimal commas + // and thousands separators into account. When these are in use, this + // function first converts them to decimal dots and empty strings, + // respectively. We use a fixed size buffer so abnormally long floats such + // as + // +00000000000000000000000000000000000000000000000000000000000000000000001.5e1 + // will be rejected. + + if (c->decskip || c->decsep != '.') { + // According to Stack Overflow https://stackoverflow.com/questions/1701055/what-is-the-maximum-length-in-chars-needed-to-represent-any-double-value + // 24 bytes is a reasonable buffer but we'll make it a bit larger. + char tmp[120]; + char *p = &tmp[0]; + + while (GDKisspace(*s)) + s++; + while (*s != '\0') { + if (p >= tmp + sizeof(tmp) - 1) { + // If the input is this big it's probably an error. + // Exception: only whitespace remains. + while (GDKisspace(*s)) + s++; + if (*s == '\0') { + // there was only trailing whitespace + break; + } else { + // not just trailing whitespace, abort! + return NULL; + } + } + char ch = *s++; + if (ch == c->decskip) { + continue; + } else if (ch == c->decsep) { + ch = '.'; + } else if (ch == '.') { + // We're mapping c->decsep to '.', if there are already + // periods in the input we're losing information + return NULL; + } + *p++ = ch; + } + // If we're here either we either encountered the end of s or the buffer is + // full. In the latter case we still need to write the NUL. + // We left room for it. + *p = '\0'; + + // now process the converted text rather than the original + s = &tmp[0]; + } + + ssize_t len = (*BATatoms[type].atomFromStr) (s, &c->len, &c->data, false); + return (len > 0) ? c->data : NULL; +} + /* Literal parsing for SQL all pass through this routine */ static void * _ASCIIadt_frStr(Column *c, int type, const char *s) @@ -532,7 +608,7 @@ has_whitespace(const char *s) } str -mvc_import_table(Client cntxt, BAT ***bats, mvc *m, bstream *bs, sql_table *t, const char *sep, const char *rsep, const char *ssep, const char *ns, lng sz, lng offset, int best, bool from_stdin, bool escape) +mvc_import_table(Client cntxt, BAT ***bats, mvc *m, bstream *bs, sql_table *t, const char *sep, const char *rsep, const char *ssep, const char *ns, lng sz, lng offset, int best, bool from_stdin, bool escape, const char *decsep, const char *decskip) { int i = 0, j; node *n; @@ -581,6 +657,8 @@ mvc_import_table(Client cntxt, BAT ***ba fmt[i].sep = (n->next) ? sep : rsep; fmt[i].rsep = rsep; fmt[i].seplen = _strlen(fmt[i].sep); + fmt[i].decsep = decsep[0], + fmt[i].decskip = decskip != NULL ? decskip[0] : '\0', fmt[i].type = sql_subtype_string(m->ta, &col->type); fmt[i].adt = ATOMindex(col->type.type->impl); fmt[i].tostr = &_ASCIIadt_toStr; @@ -610,6 +688,9 @@ mvc_import_table(Client cntxt, BAT ***ba } else if (col->type.type->eclass == EC_SEC) { fmt[i].tostr = &dec_tostr; fmt[i].frstr = &sec_frstr; + } else if (col->type.type->eclass == EC_FLT) { + // no need to override .tostr, only .frstr + fmt[i].frstr = &fltdbl_frStr; } fmt[i].size = ATOMsize(fmt[i].adt); } diff --git a/sql/backends/monet5/sql_result.h b/sql/backends/monet5/sql_result.h --- a/sql/backends/monet5/sql_result.h +++ b/sql/backends/monet5/sql_result.h @@ -31,7 +31,7 @@ extern int mvc_export_bin_chunk(backend extern int mvc_export_prepare(backend *b, stream *s); -extern str mvc_import_table(Client cntxt, BAT ***bats, mvc *c, bstream *s, sql_table *t, const char *sep, const char *rsep, const char *ssep, const char *ns, lng nr, lng offset, int best, bool from_stdin, bool escape); +extern str mvc_import_table(Client cntxt, BAT ***bats, mvc *c, bstream *s, sql_table *t, const char *sep, const char *rsep, const char *ssep, const char *ns, lng nr, lng offset, int best, bool from_stdin, bool escape, const char *decsep, const char *decskip); sql5_export int mvc_result_table(backend *be, oid query_id, int nr_cols, mapi_query_t type); sql5_export int mvc_result_column(backend *be, const char *tn, const char *name, const char *typename, int digits, int scale, BAT *b); diff --git a/sql/backends/monet5/vaults/csv/csv.c b/sql/backends/monet5/vaults/csv/csv.c --- a/sql/backends/monet5/vaults/csv/csv.c +++ b/sql/backends/monet5/vaults/csv/csv.c @@ -488,7 +488,7 @@ csv_load(void *BE, sql_subfunc *f, char /* (res bats) := import(table T, 'delimit', '\n', 'quote', str:nil, fname, lng:nil, 0/1, 0, str:nil, int:nil, * int:nil ); */ /* lookup copy_from */ - sql_subfunc *cf = sql_find_func(sql, "sys", "copyfrom", 12, F_UNION, true, NULL); + sql_subfunc *cf = sql_find_func(sql, "sys", "copyfrom", 14, F_UNION, true, NULL); cf->res = f->res; sql_subtype tpe; @@ -506,27 +506,27 @@ csv_load(void *BE, sql_subfunc *f, char rsep[0] = '\n'; rsep[1] = 0; } - list *args = append( append( append( append( append( new_exp_list(sql->sa), - exp_atom_ptr(sql->sa, t)), - exp_atom_str(sql->sa, tsep, &tpe)), - exp_atom_str(sql->sa, rsep, &tpe)), - exp_atom_str(sql->sa, ssep, &tpe)), - exp_atom_str(sql->sa, "", &tpe)); + list *args = new_exp_list(sql->sa); + + append(args, exp_atom_ptr(sql->sa, t)); + append(args, exp_atom_str(sql->sa, tsep, &tpe)); + append(args, exp_atom_str(sql->sa, rsep, &tpe)); + append(args, exp_atom_str(sql->sa, ssep, &tpe)); - append( args, exp_atom_str(sql->sa, filename, &tpe)); - sql_exp *import = exp_op(sql->sa, - append( - append( - append( - append( - append( - append(args, topn?topn: - exp_atom_lng(sql->sa, -1)), - exp_atom_lng(sql->sa, r->has_header?2:1)), - exp_atom_int(sql->sa, 0)), - exp_atom_str(sql->sa, NULL, &tpe)), - exp_atom_int(sql->sa, 0)), - exp_atom_int(sql->sa, 0)), cf); + append(args, exp_atom_str(sql->sa, "", &tpe)); + append(args, exp_atom_str(sql->sa, filename, &tpe)); + append(args, topn ? topn: exp_atom_lng(sql->sa, -1)); + append(args, exp_atom_lng(sql->sa, r->has_header?2:1)); + + append(args, exp_atom_int(sql->sa, 0)); + append(args, exp_atom_str(sql->sa, NULL, &tpe)); + append(args, exp_atom_int(sql->sa, 0)); + append(args, exp_atom_int(sql->sa, 0)); + + append(args, exp_atom_str(sql->sa, ".", &tpe)); + append(args, exp_atom_str(sql->sa, NULL, &tpe)); + + sql_exp *import = exp_op(sql->sa, args, cf); _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org