Changeset: 57668aada017 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/57668aada017
Modified Files:
        sql/backends/monet5/rel_bin.c
        sql/backends/monet5/vaults/csv/csv.c
        sql/server/rel_select.c
        sql/test/file_loader/Tests/file_loader_function.test
        sql/test/file_loader/Tests/file_loader_string.test
Branch: Dec2023
Log Message:

small fixes in the file loader:

on unknown file extensions fail back to the csv loader
handle - (and +) in decimal detection
when last column has type CSV_NULL assume combined tuple/record seperator.


diffs (156 lines):

diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c
--- a/sql/backends/monet5/rel_bin.c
+++ b/sql/backends/monet5/rel_bin.c
@@ -1362,6 +1362,8 @@ exp2bin_file_loader(backend *be, sql_exp
 
        file_loader_t *fl = fl_find(ext);
        if (!fl)
+               fl = fl_find("csv");
+       if (!fl)
                return NULL;
        sql_exp *fexp = arg_list->h->data;
        assert(is_atom(fexp->type));
diff --git a/sql/backends/monet5/vaults/csv/csv.c 
b/sql/backends/monet5/vaults/csv/csv.c
--- a/sql/backends/monet5/vaults/csv/csv.c
+++ b/sql/backends/monet5/vaults/csv/csv.c
@@ -189,6 +189,8 @@ detect_bool(const char *s, const char *e
 static bool
 detect_bigint(const char *s, const char *e)
 {
+       if (s[0] == '-' || s[0] == '+')
+               s++;
        while(s < e) {
                if (!isdigit(*s))
                        break;
@@ -204,6 +206,8 @@ detect_decimal(const char *s, const char
 {
        int dotseen = 0;
 
+       if (s[0] == '-' || s[0] == '+')
+               s++;
        while(s < e) {
                if (!dotseen && *s == '.')
                        dotseen = (int)(e-(s+1));
@@ -371,6 +375,7 @@ typedef struct csv_t {
        char quote;
        char delim;
        bool has_header;
+       bool extra_tsep;
 } csv_t;
 
 /*
@@ -402,7 +407,7 @@ csv_relation(mvc *sql, sql_subfunc *f, c
        if (l<0)
                return RUNTIME_LOAD_ERROR;
        buf[l] = 0;
-       bool has_header = false;
+       bool has_header = false, extra_tsep = false;
        int nr_fields = 0;
        char q = detect_quote(buf);
        char d = detect_delimiter(buf, q, &nr_fields);
@@ -425,13 +430,20 @@ csv_relation(mvc *sql, sql_subfunc *f, c
                        sql_subtype *t = (types[col].type == CSV_DECIMAL)?
                                        sql_bind_subtype(sql->sa, st, 18, 
types[col].scale):
                                        sql_bind_subtype(sql->sa, st,  0, 
types[col].scale);
-
-                       list_append(typelist, t);
-                       list_append(res_exps, exp_column(sql->sa, NULL, name, 
t, CARD_MULTI, 1, 0, 0));
+                       if (!t && (col+1) == nr_fields && types[col].type == 
CSV_NULL) {
+                               nr_fields--;
+                               extra_tsep = true;
+                       } else if (t) {
+                               list_append(typelist, t);
+                               list_append(res_exps, exp_column(sql->sa, NULL, 
name, t, CARD_MULTI, 1, 0, 0));
+                       } else {
+                               GDKfree(types);
+                               throw(SQL, SQLSTATE(42000), "csv" "type %s not 
found\n", st);
+                       }
                } else {
                        /* shouldn't be possible, we fallback to strings */
                        GDKfree(types);
-                       assert(0);
+                       throw(SQL, SQLSTATE(42000), "csv" "type unknown\n");
                }
        }
        GDKfree(types);
@@ -443,6 +455,7 @@ csv_relation(mvc *sql, sql_subfunc *f, c
        r->sname[0] = 0;
        r->quote = q;
        r->delim = d;
+       r->extra_tsep = extra_tsep;
        r->has_header = has_header;
        f->sname = (char*)r; /* pass schema++ */
        return MAL_SUCCEED;
@@ -467,7 +480,7 @@ csv_load(void *BE, sql_subfunc *f, char 
                sql_subtype *tp = tn->data;
                sql_column *c = NULL;
 
-               if (mvc_create_column(&c, be->mvc, t, name, tp) != LOG_OK) {
+               if (!tp || mvc_create_column(&c, be->mvc, t, name, tp) != 
LOG_OK) {
                        //throw(SQL, SQLSTATE(42000), "csv" RUNTIME_LOAD_ERROR);
                        return NULL;
                }
@@ -480,15 +493,23 @@ csv_load(void *BE, sql_subfunc *f, char 
 
        sql_subtype tpe;
        sql_find_subtype(&tpe, "varchar", 0, 0);
-       char tsep[2], ssep[2];
+       char tsep[2], rsep[3], ssep[2];
        tsep[0] = r->delim;
        tsep[1] = 0;
        ssep[0] = r->quote;
        ssep[1] = 0;
+       if (r->extra_tsep) {
+               rsep[0] = r->delim;
+               rsep[1] = '\n';
+               rsep[2] = 0;
+       } else {
+               rsep[0] = '\n';
+               rsep[1] = 0;
+       }
        list *args = append( append( append( append( append( 
new_exp_list(sql->sa),
        exp_atom_ptr(sql->sa, t)),
        exp_atom_str(sql->sa, tsep, &tpe)),
-       exp_atom_str(sql->sa, "\n", &tpe)),
+       exp_atom_str(sql->sa, rsep, &tpe)),
        exp_atom_str(sql->sa, ssep, &tpe)),
        exp_atom_str(sql->sa, "", &tpe));
 
diff --git a/sql/server/rel_select.c b/sql/server/rel_select.c
--- a/sql/server/rel_select.c
+++ b/sql/server/rel_select.c
@@ -563,7 +563,9 @@ file_loader_add_table_column_types(mvc *
                        *d = 0;
                        fl = fl_find(ext);
                }
-               if (!fl)
+               if (!fl) /* fallback */
+                       fl = fl_find("csv");
+               if (!fl) /* not expected */
                        return sa_message(sql->ta, "Filename extension '%s' 
missing", ext?ext:"");
        }
        str err = fl->add_types(sql, f, filename, res_exps, tname);
diff --git a/sql/test/file_loader/Tests/file_loader_function.test 
b/sql/test/file_loader/Tests/file_loader_function.test
--- a/sql/test/file_loader/Tests/file_loader_function.test
+++ b/sql/test/file_loader/Tests/file_loader_function.test
@@ -7,7 +7,7 @@ select * from file_loader('FileNotFound'
 statement error 42000!SELECT: file_loader function failed 'File not found'
 select * from file_loader('/tmp/FileNotFound.csv')
 
-statement error 42000!SELECT: file_loader function failed 'Filename extension 
'gz' missing'
+statement error 42000!SELECT: file_loader function failed 'File not found'
 select * from file_loader('/tmp/FileNotFound.gz')
 
 statement error 42000!SELECT: file_loader function failed 'File not found'
diff --git a/sql/test/file_loader/Tests/file_loader_string.test 
b/sql/test/file_loader/Tests/file_loader_string.test
--- a/sql/test/file_loader/Tests/file_loader_string.test
+++ b/sql/test/file_loader/Tests/file_loader_string.test
@@ -10,7 +10,7 @@ select * from '/tmp/FileNotFound'
 statement error 42000!SELECT: file_loader function failed 'File not found'
 select * from '/tmp/FileNotFound.csv'
 
-statement error 42000!SELECT: file_loader function failed 'Filename extension 
'gz' missing'
+statement error 42000!SELECT: file_loader function failed 'File not found'
 select * from '/tmp/FileNotFound.gz'
 
 statement error 42000!SELECT: file_loader function failed 'File not found'
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to