Changeset: 700573773140 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/700573773140
Modified Files:
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql_result.c
        sql/backends/monet5/vaults/csv/csv.c
        sql/common/sql_types.c
        sql/server/rel_updates.c
        sql/server/sql_parser.y
Branch: default
Log Message:

Merge 'decimaldelimiters' into 'default'


diffs (truncated from 856 to 300 lines):

diff --git a/monetdb5/modules/mal/tablet.h b/monetdb5/modules/mal/tablet.h
--- a/monetdb5/modules/mal/tablet.h
+++ b/monetdb5/modules/mal/tablet.h
@@ -30,6 +30,8 @@ typedef struct Column_t {
        const char *name;                       /* column title */
        const char *sep;
        const char *rsep;
+       char decsep;
+       char decskip;
        int seplen;
        const char *type;
        int adt;                                        /* type index */
diff --git a/sql/backends/monet5/sql.c b/sql/backends/monet5/sql.c
--- a/sql/backends/monet5/sql.c
+++ b/sql/backends/monet5/sql.c
@@ -3140,6 +3140,8 @@ mvc_import_table_wrap(Client cntxt, MalB
        const char *fixed_widths = *getArgReference_str(stk, pci, pci->retc + 
9);
        int onclient = *getArgReference_int(stk, pci, pci->retc + 10);
        bool escape = *getArgReference_int(stk, pci, pci->retc + 11);
+       const char *decsep = *getArgReference_str(stk, pci, pci->retc + 12);
+       const char *decskip = *getArgReference_str(stk, pci, pci->retc + 13);
        str msg = MAL_SUCCEED;
        bstream *s = NULL;
        stream *ss;
@@ -3149,6 +3151,10 @@ mvc_import_table_wrap(Client cntxt, MalB
                return msg;
        if (onclient && !cntxt->filetrans)
                throw(MAL, "sql.copy_from", SQLSTATE(42000) "Cannot transfer 
files from client");
+       if (strNil(decsep))
+               throw(MAL, "sql.copy_from", SQLSTATE(42000) "decimal separator 
cannot be nil");
+       if (strNil(decskip))
+               decskip = NULL;
 
        be = cntxt->sqlcontext;
        /* The CSV parser expects ssep to have the value 0 if the user does not
@@ -3160,7 +3166,7 @@ mvc_import_table_wrap(Client cntxt, MalB
        if (strNil(fname))
                fname = NULL;
        if (fname == NULL) {
-               msg = mvc_import_table(cntxt, &b, be->mvc, be->mvc->scanner.rs, 
t, tsep, rsep, ssep, ns, sz, offset, besteffort, true, escape);
+               msg = mvc_import_table(cntxt, &b, be->mvc, be->mvc->scanner.rs, 
t, tsep, rsep, ssep, ns, sz, offset, besteffort, true, escape, decsep, decskip);
        } else {
                if (onclient) {
                        ss = mapi_request_upload(fname, false, 
be->mvc->scanner.rs, be->mvc->scanner.ws);
@@ -3218,7 +3224,7 @@ mvc_import_table_wrap(Client cntxt, MalB
                        close_stream(ss);
                        throw(MAL, "sql.copy_from", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
                }
-               msg = mvc_import_table(cntxt, &b, be->mvc, s, t, tsep, rsep, 
ssep, ns, sz, offset, besteffort, false, escape);
+               msg = mvc_import_table(cntxt, &b, be->mvc, s, t, tsep, rsep, 
ssep, ns, sz, offset, besteffort, false, escape, decsep, decskip);
                // This also closes ss:
                bstream_destroy(s);
        }
@@ -5314,7 +5320,7 @@ static mel_func sql_init_funcs[] = {
  pattern("sql", "export_bin_column", mvc_bin_export_column_wrap, true, "export 
column as binary", args(1, 5, arg("", lng), batargany("col", 1), 
arg("byteswap", bit), arg("filename", str), arg("onclient", int))),
  pattern("sql", "export_bin_column", mvc_bin_export_column_wrap, true, "export 
column as binary", args(1, 5, arg("", lng), argany("val", 1), arg("byteswap", 
bit), arg("filename", str), arg("onclient", int))),
  pattern("sql", "affectedRows", mvc_affected_rows_wrap, true, "export the 
number of affected rows by the current query", args(1,3, 
arg("",int),arg("mvc",int),arg("nr",lng))),
- pattern("sql", "copy_from", mvc_import_table_wrap, true, "Import a table from 
bstream s with the \ngiven tuple and seperators (sep/rsep)", args(1,13, 
batvarargany("",0),arg("t",ptr),arg("sep",str),arg("rsep",str),arg("ssep",str),arg("ns",str),arg("fname",str),arg("nr",lng),arg("offset",lng),arg("best",int),arg("fwf",str),arg("onclient",int),arg("escape",int))),
+ pattern("sql", "copy_from", mvc_import_table_wrap, true, "Import a table from 
bstream s with the \ngiven tuple and seperators (sep/rsep)", args(1,15, 
batvarargany("",0),arg("t",ptr),arg("sep",str),arg("rsep",str),arg("ssep",str),arg("ns",str),arg("fname",str),arg("nr",lng),arg("offset",lng),arg("best",int),arg("fwf",str),arg("onclient",int),arg("escape",int),arg("decsep",str),arg("decskip",str))),
  //we use bat.single now
  //pattern("sql", "single", CMDBATsingle, false, "", args(1,2, 
batargany("",2),argany("x",2))),
  pattern("sql", "importColumn", mvc_bin_import_column_wrap, false, "Import a 
column from the given file", args(2, 8, batargany("", 0),arg("", oid), 
arg("method",str),arg("width",int),arg("bswap",bit),arg("path",str),arg("onclient",int),arg("nrows",oid))),
diff --git a/sql/backends/monet5/sql_result.c b/sql/backends/monet5/sql_result.c
--- a/sql/backends/monet5/sql_result.c
+++ b/sql/backends/monet5/sql_result.c
@@ -317,7 +317,9 @@ bat_max_length(hge, hge)
                } else if (*s == '+'){                                          
                                        \
                        s++;                                                    
                                                        \
                }                                                               
                                                                \
-               for (i = 0; *s && *s != '.' && ((res == 0 && *s == '0') || i < 
t->digits - t->scale); s++) { \
+               for (i = 0; *s && *s != c->decsep && ((res == 0 && *s == '0') 
|| i < t->digits - t->scale); s++) { \
+                       if (c->decskip && *s == c->decskip)                     
                                \
+                               continue;                                       
                                                        \
                        if (!isdigit((unsigned char) *s))                       
                                \
                                break;                                          
                                                        \
                        res *= 10;                                              
                                                        \
@@ -325,12 +327,18 @@ bat_max_length(hge, hge)
                        if (res)                                                
                                                        \
                                i++;                                            
                                                        \
                }                                                               
                                                                \
-               if (*s == '.') {                                                
                                                \
+               if (*s == c->decsep) {                                          
                                        \
                        s++;                                                    
                                                        \
-                       while (*s && isdigit((unsigned char) *s) && scale > 0) 
{        \
-                               res *= 10;                                      
                                                        \
-                               res += *s++ - '0';                              
                                                \
-                               scale--;                                        
                                                        \
+                       while (*s && scale > 0) {                               
                                        \
+                               if (isdigit((unsigned char) *s)) {              
                                \
+                                       res *= 10;                              
                                                        \
+                                       res += *s++ - '0';                      
                                                \
+                                       scale--;                                
                                                        \
+                               } else if (c->decskip && *s == c->decskip) {    
                \
+                                       s++;                                    
                                                        \
+                               } else {                                        
                                                        \
+                                       break;                                  
                                                        \
+                               }                                               
                                                                \
                        }                                                       
                                                                \
                }                                                               
                                                                \
                while(*s && isspace((unsigned char) *s))                        
                        \
@@ -356,6 +364,8 @@ bat_max_length(hge, hge)
 static void *
 dec_frstr(Column *c, int type, const char *s)
 {
+       assert(c->decsep != '\0');
+
        /* support dec map to bte, sht, int and lng */
        if( strcmp(s,"nil")== 0)
                return NULL;
@@ -395,7 +405,11 @@ sec_frstr(Column *c, int type, const cha
                neg = 0;
                s++;
        }
-       for (i = 0; i < (19 - 3) && *s && *s != '.'; i++, s++) {
+       for (i = 0; i < (19 - 3) && *s && *s != c->decsep; i++, s++) {
+               if (c->decskip && *s == c->decskip) {
+                       i--;
+                       continue;
+               }
                if (!isdigit((unsigned char) *s))
                        return NULL;
                res *= 10;
@@ -403,10 +417,14 @@ sec_frstr(Column *c, int type, const cha
        }
        i = 0;
        if (*s) {
-               if (*s != '.')
+               if (*s != c->decsep)
                        return NULL;
                s++;
                for (; *s && i < 3; i++, s++) {
+                       if (c->decskip && *s == c->decskip) {
+                               i--;
+                               continue;
+                       }
                        if (!isdigit((unsigned char) *s))
                                return NULL;
                        res *= 10;
@@ -429,6 +447,64 @@ sec_frstr(Column *c, int type, const cha
        return (void *) r;
 }
 
+static void *
+fltdbl_frStr(Column *c, int type, const char *s)
+{
+       // The regular fltFromStr/dblFromStr functions do not take decimal 
commas
+       // and thousands separators into account. When these are in use, this
+       // function first converts them to decimal dots and empty strings,
+       // respectively. We use a fixed size buffer so abnormally long floats 
such
+       // as
+       // 
+00000000000000000000000000000000000000000000000000000000000000000000001.5e1
+       // will be rejected.
+
+       if (c->decskip || c->decsep != '.') {
+               // According to Stack Overflow 
https://stackoverflow.com/questions/1701055/what-is-the-maximum-length-in-chars-needed-to-represent-any-double-value
+               // 24 bytes is a reasonable buffer but we'll make it a bit 
larger.
+               char tmp[120];
+               char *p = &tmp[0];
+
+               while (GDKisspace(*s))
+                       s++;
+               while (*s != '\0') {
+                       if (p >= tmp + sizeof(tmp) - 1) {
+                               // If the input is this big it's probably an 
error.
+                               // Exception: only whitespace remains.
+                               while (GDKisspace(*s))
+                                       s++;
+                               if (*s == '\0') {
+                                       // there was only trailing whitespace
+                                       break;
+                               } else {
+                                       // not just trailing whitespace, abort!
+                                       return NULL;
+                               }
+                       }
+                       char ch = *s++;
+                       if (ch == c->decskip) {
+                               continue;
+                       } else if (ch == c->decsep) {
+                               ch = '.';
+                       } else if (ch == '.') {
+                               // We're mapping c->decsep to '.', if there are 
already
+                               // periods in the input we're losing information
+                               return NULL;
+                       }
+                       *p++ = ch;
+               }
+               // If we're here either we either encountered the end of s or 
the buffer is
+               // full. In the latter case we still need to write the NUL.
+               // We left room for it.
+               *p = '\0';
+
+               // now process the converted text rather than the original
+               s = &tmp[0];
+       }
+
+       ssize_t len = (*BATatoms[type].atomFromStr) (s, &c->len, &c->data, 
false);
+       return (len > 0) ? c->data : NULL;
+}
+
 /* Literal parsing for SQL all pass through this routine */
 static void *
 _ASCIIadt_frStr(Column *c, int type, const char *s)
@@ -532,7 +608,7 @@ has_whitespace(const char *s)
 }
 
 str
-mvc_import_table(Client cntxt, BAT ***bats, mvc *m, bstream *bs, sql_table *t, 
const char *sep, const char *rsep, const char *ssep, const char *ns, lng sz, 
lng offset, int best, bool from_stdin, bool escape)
+mvc_import_table(Client cntxt, BAT ***bats, mvc *m, bstream *bs, sql_table *t, 
const char *sep, const char *rsep, const char *ssep, const char *ns, lng sz, 
lng offset, int best, bool from_stdin, bool escape, const char *decsep, const 
char *decskip)
 {
        int i = 0, j;
        node *n;
@@ -581,6 +657,8 @@ mvc_import_table(Client cntxt, BAT ***ba
                        fmt[i].sep = (n->next) ? sep : rsep;
                        fmt[i].rsep = rsep;
                        fmt[i].seplen = _strlen(fmt[i].sep);
+                       fmt[i].decsep = decsep[0],
+                       fmt[i].decskip = decskip != NULL ? decskip[0] : '\0',
                        fmt[i].type = sql_subtype_string(m->ta, &col->type);
                        fmt[i].adt = ATOMindex(col->type.type->impl);
                        fmt[i].tostr = &_ASCIIadt_toStr;
@@ -610,6 +688,9 @@ mvc_import_table(Client cntxt, BAT ***ba
                        } else if (col->type.type->eclass == EC_SEC) {
                                fmt[i].tostr = &dec_tostr;
                                fmt[i].frstr = &sec_frstr;
+                       } else if (col->type.type->eclass == EC_FLT) {
+                               // no need to override .tostr, only .frstr
+                               fmt[i].frstr = &fltdbl_frStr;
                        }
                        fmt[i].size = ATOMsize(fmt[i].adt);
                }
diff --git a/sql/backends/monet5/sql_result.h b/sql/backends/monet5/sql_result.h
--- a/sql/backends/monet5/sql_result.h
+++ b/sql/backends/monet5/sql_result.h
@@ -31,7 +31,7 @@ extern int mvc_export_bin_chunk(backend 
 
 extern int mvc_export_prepare(backend *b, stream *s);
 
-extern str mvc_import_table(Client cntxt, BAT ***bats, mvc *c, bstream *s, 
sql_table *t, const char *sep, const char *rsep, const char *ssep, const char 
*ns, lng nr, lng offset, int best, bool from_stdin, bool escape);
+extern str mvc_import_table(Client cntxt, BAT ***bats, mvc *c, bstream *s, 
sql_table *t, const char *sep, const char *rsep, const char *ssep, const char 
*ns, lng nr, lng offset, int best, bool from_stdin, bool escape, const char 
*decsep, const char *decskip);
 sql5_export int mvc_result_table(backend *be, oid query_id, int nr_cols, 
mapi_query_t type);
 
 sql5_export int mvc_result_column(backend *be, const char *tn, const char 
*name, const char *typename, int digits, int scale, BAT *b);
diff --git a/sql/backends/monet5/vaults/csv/csv.c 
b/sql/backends/monet5/vaults/csv/csv.c
--- a/sql/backends/monet5/vaults/csv/csv.c
+++ b/sql/backends/monet5/vaults/csv/csv.c
@@ -488,7 +488,7 @@ csv_load(void *BE, sql_subfunc *f, char 
        /* (res bats) := import(table T, 'delimit', '\n', 'quote', str:nil, 
fname, lng:nil, 0/1, 0, str:nil, int:nil, * int:nil ); */
 
        /* lookup copy_from */
-       sql_subfunc *cf = sql_find_func(sql, "sys", "copyfrom", 12, F_UNION, 
true, NULL);
+       sql_subfunc *cf = sql_find_func(sql, "sys", "copyfrom", 14, F_UNION, 
true, NULL);
        cf->res = f->res;
 
        sql_subtype tpe;
@@ -506,27 +506,27 @@ csv_load(void *BE, sql_subfunc *f, char 
                rsep[0] = '\n';
                rsep[1] = 0;
        }
-       list *args = append( append( append( append( append( 
new_exp_list(sql->sa),
-       exp_atom_ptr(sql->sa, t)),
-       exp_atom_str(sql->sa, tsep, &tpe)),
-       exp_atom_str(sql->sa, rsep, &tpe)),
-       exp_atom_str(sql->sa, ssep, &tpe)),
-       exp_atom_str(sql->sa, "", &tpe));
+       list *args = new_exp_list(sql->sa);
+
+       append(args, exp_atom_ptr(sql->sa, t));
+       append(args, exp_atom_str(sql->sa, tsep, &tpe));
+       append(args, exp_atom_str(sql->sa, rsep, &tpe));
+       append(args, exp_atom_str(sql->sa, ssep, &tpe));
 
-       append( args, exp_atom_str(sql->sa, filename, &tpe));
-       sql_exp *import = exp_op(sql->sa,
-               append(
-                       append(
-                           append(
-                               append(
-                                   append(
-                                       append(args, topn?topn:
-                                              exp_atom_lng(sql->sa, -1)),
-                                       exp_atom_lng(sql->sa, 
r->has_header?2:1)),
-                                   exp_atom_int(sql->sa, 0)),
-                               exp_atom_str(sql->sa, NULL, &tpe)),
-                           exp_atom_int(sql->sa, 0)),
-                       exp_atom_int(sql->sa, 0)), cf);
+       append(args, exp_atom_str(sql->sa, "", &tpe));
+       append(args, exp_atom_str(sql->sa, filename, &tpe));
+       append(args, topn ? topn: exp_atom_lng(sql->sa, -1));
+       append(args, exp_atom_lng(sql->sa, r->has_header?2:1));
+
+       append(args, exp_atom_int(sql->sa, 0));
+       append(args, exp_atom_str(sql->sa, NULL, &tpe));
+       append(args, exp_atom_int(sql->sa, 0));
+       append(args, exp_atom_int(sql->sa, 0));
+
+       append(args, exp_atom_str(sql->sa, ".", &tpe));
+       append(args, exp_atom_str(sql->sa, NULL, &tpe));
+
+       sql_exp *import = exp_op(sql->sa, args, cf);
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to