Changeset: 2402505ea2a8 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/2402505ea2a8 Modified Files: sql/backends/monet5/sql.c sql/backends/monet5/sql_result.c sql/test/copy/Tests/decimal_separators.test Branch: default Log Message:
Implement DECIMAL AS for floats diffs (205 lines): diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c --- a/sql/backends/monet5/sql.c +++ b/sql/backends/monet5/sql.c @@ -3123,6 +3123,10 @@ mvc_import_table_wrap(Client cntxt, MalB return msg; if (onclient && !cntxt->filetrans) throw(MAL, "sql.copy_from", SQLSTATE(42000) "Cannot transfer files from client"); + if (strNil(decsep)) + throw(MAL, "sql.copy_from", SQLSTATE(42000) "decimal separator cannot be nil"); + if (strNil(decskip)) + decskip = NULL; be = cntxt->sqlcontext; /* The CSV parser expects ssep to have the value 0 if the user does not diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c --- a/sql/backends/monet5/sql_result.c +++ b/sql/backends/monet5/sql_result.c @@ -447,6 +447,64 @@ sec_frstr(Column *c, int type, const cha return (void *) r; } +static void * +fltdbl_frStr(Column *c, int type, const char *s) +{ + // The regular fltFromStr/dblFromStr functions do not take decimal commas + // and thousands separators into account. When these are in use, this + // function first converts them to decimal dots and empty strings, + // respectively. We use a fixed size buffer so abnormally long floats such + // as + // +00000000000000000000000000000000000000000000000000000000000000000000001.5e1 + // will be rejected. + + if (c->decskip || c->decsep != '.') { + // According to Stack Overflow https://stackoverflow.com/questions/1701055/what-is-the-maximum-length-in-chars-needed-to-represent-any-double-value + // 24 bytes is a reasonable buffer but we'll make it a bit larger. + char tmp[120]; + char *p = &tmp[0]; + + while (GDKisspace(*s)) + s++; + while (*s != '\0') { + if (p >= tmp + sizeof(tmp) - 1) { + // If the input is this big it's probably an error. + // Exception: only whitespace remains. + while (GDKisspace(*s)) + s++; + if (*s == '\0') { + // there was only trailing whitespace + break; + } else { + // not just trailing whitespace, abort! + return NULL; + } + } + char ch = *s++; + if (ch == c->decskip) { + continue; + } else if (ch == c->decsep) { + ch = '.'; + } else if (ch == '.') { + // We're mapping c->decsep to '.', if there are already + // periods in the input we're losing information + return NULL; + } + *p++ = ch; + } + // If we're here either we either encountered the end of s or the buffer is + // full. In the latter case we still need to write the NUL. + // We left room for it. + *p = '\0'; + + // now process the converted text rather than the original + s = &tmp[0]; + } + + ssize_t len = (*BATatoms[type].atomFromStr) (s, &c->len, &c->data, false); + return (len > 0) ? c->data : NULL; +} + /* Literal parsing for SQL all pass through this routine */ static void * _ASCIIadt_frStr(Column *c, int type, const char *s) @@ -599,8 +657,8 @@ mvc_import_table(Client cntxt, BAT ***ba fmt[i].sep = (n->next) ? sep : rsep; fmt[i].rsep = rsep; fmt[i].seplen = _strlen(fmt[i].sep); - fmt[i].decsep = '\0', - fmt[i].decskip = '\0', + fmt[i].decsep = decsep[0], + fmt[i].decskip = decskip != NULL ? decskip[0] : '\0', fmt[i].type = sql_subtype_string(m->ta, &col->type); fmt[i].adt = ATOMindex(col->type.type->impl); fmt[i].tostr = &_ASCIIadt_toStr; @@ -627,13 +685,12 @@ mvc_import_table(Client cntxt, BAT ***ba if (col->type.type->eclass == EC_DEC) { fmt[i].tostr = &dec_tostr; fmt[i].frstr = &dec_frstr; - fmt[i].decsep = decsep[0]; // apply DECIMAL DELIMITERS clause - fmt[i].decskip = decskip[0]; } else if (col->type.type->eclass == EC_SEC) { fmt[i].tostr = &dec_tostr; fmt[i].frstr = &sec_frstr; - fmt[i].decsep = decsep[0]; // apply DECIMAL DELIMITERS clause - fmt[i].decskip = decskip[0]; + } else if (col->type.type->eclass == EC_FLT) { + // no need to override .tostr, only .frstr + fmt[i].frstr = &fltdbl_frStr; } fmt[i].size = ATOMsize(fmt[i].adt); } diff --git a/sql/test/copy/Tests/decimal_separators.test b/sql/test/copy/Tests/decimal_separators.test --- a/sql/test/copy/Tests/decimal_separators.test +++ b/sql/test/copy/Tests/decimal_separators.test @@ -10,6 +10,15 @@ CREATE TABLE secs(id INT, i INTERVAL SEC statement ok CREATE TABLE days(id INT, i INTERVAL DAY) +statement ok +CREATE TABLE floats(id INT, r REAL) + +statement ok +CREATE TABLE doubles(id INT, d DOUBLE) + +statement ok +SAVEPOINT tablescreated; + -- -- this is the default behavior -- @@ -163,3 +172,75 @@ 21 statement ok DELETE FROM days + +-- +-- doubles and reals, default behavior +-- + +statement ok +COPY 2 RECORDS INTO floats FROM STDIN +<COPY_INTO_DATA> +1|-1.0e3 +2| +00000000000000000000000000000000001.5e1 + +query T +SELECT r FROM floats ORDER BY id +---- +-1000.0 +15.0 + +statement ok +DELETE FROM floats + + +statement ok +COPY 2 RECORDS INTO doubles FROM STDIN +<COPY_INTO_DATA> +1|-1.0e3 +2| +00000000000000000000000000000000001.5e1 + +query T +SELECT d FROM doubles ORDER BY id +---- +-1000.0 +15.0 + +statement ok +DELETE FROM doubles + + +-- +-- doubles and reals, behavior with decimal comma and thousands separator +-- + +statement ok +COPY 2 RECORDS INTO floats FROM STDIN DECIMAL AS ',', '_' +<COPY_INTO_DATA> +1|-1,0e3 +2| +0000000000000_0000__000000000000000001,5e1 + +query T +SELECT r FROM floats ORDER BY id +---- +-1000.0 +15.0 + +statement ok +DELETE FROM floats + + +statement ok +COPY 2 RECORDS INTO doubles FROM STDIN DECIMAL AS ',', '_' +<COPY_INTO_DATA> +1|-1,0e3 +2| +0000000000000_0000__000000000000000001,5e1 + +query T +SELECT d FROM doubles ORDER BY id +---- +-1000.0 +15.0 + +statement ok +DELETE FROM doubles + _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org