Changeset: 57668aada017 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/57668aada017 Modified Files: sql/backends/monet5/rel_bin.c sql/backends/monet5/vaults/csv/csv.c sql/server/rel_select.c sql/test/file_loader/Tests/file_loader_function.test sql/test/file_loader/Tests/file_loader_string.test Branch: Dec2023 Log Message:
small fixes in the file loader: on unknown file extensions fail back to the csv loader handle - (and +) in decimal detection when last column has type CSV_NULL assume combined tuple/record seperator. diffs (156 lines): diff --git a/sql/backends/monet5/rel_bin.c b/sql/backends/monet5/rel_bin.c --- a/sql/backends/monet5/rel_bin.c +++ b/sql/backends/monet5/rel_bin.c @@ -1362,6 +1362,8 @@ exp2bin_file_loader(backend *be, sql_exp file_loader_t *fl = fl_find(ext); if (!fl) + fl = fl_find("csv"); + if (!fl) return NULL; sql_exp *fexp = arg_list->h->data; assert(is_atom(fexp->type)); diff --git a/sql/backends/monet5/vaults/csv/csv.c b/sql/backends/monet5/vaults/csv/csv.c --- a/sql/backends/monet5/vaults/csv/csv.c +++ b/sql/backends/monet5/vaults/csv/csv.c @@ -189,6 +189,8 @@ detect_bool(const char *s, const char *e static bool detect_bigint(const char *s, const char *e) { + if (s[0] == '-' || s[0] == '+') + s++; while(s < e) { if (!isdigit(*s)) break; @@ -204,6 +206,8 @@ detect_decimal(const char *s, const char { int dotseen = 0; + if (s[0] == '-' || s[0] == '+') + s++; while(s < e) { if (!dotseen && *s == '.') dotseen = (int)(e-(s+1)); @@ -371,6 +375,7 @@ typedef struct csv_t { char quote; char delim; bool has_header; + bool extra_tsep; } csv_t; /* @@ -402,7 +407,7 @@ csv_relation(mvc *sql, sql_subfunc *f, c if (l<0) return RUNTIME_LOAD_ERROR; buf[l] = 0; - bool has_header = false; + bool has_header = false, extra_tsep = false; int nr_fields = 0; char q = detect_quote(buf); char d = detect_delimiter(buf, q, &nr_fields); @@ -425,13 +430,20 @@ csv_relation(mvc *sql, sql_subfunc *f, c sql_subtype *t = (types[col].type == CSV_DECIMAL)? sql_bind_subtype(sql->sa, st, 18, types[col].scale): sql_bind_subtype(sql->sa, st, 0, types[col].scale); - - list_append(typelist, t); - list_append(res_exps, exp_column(sql->sa, NULL, name, t, CARD_MULTI, 1, 0, 0)); + if (!t && (col+1) == nr_fields && types[col].type == CSV_NULL) { + nr_fields--; + extra_tsep = true; + } else if (t) { + list_append(typelist, t); + list_append(res_exps, exp_column(sql->sa, NULL, name, t, CARD_MULTI, 1, 0, 0)); + } else { + GDKfree(types); + throw(SQL, SQLSTATE(42000), "csv" "type %s not found\n", st); + } } else { /* shouldn't be possible, we fallback to strings */ GDKfree(types); - assert(0); + throw(SQL, SQLSTATE(42000), "csv" "type unknown\n"); } } GDKfree(types); @@ -443,6 +455,7 @@ csv_relation(mvc *sql, sql_subfunc *f, c r->sname[0] = 0; r->quote = q; r->delim = d; + r->extra_tsep = extra_tsep; r->has_header = has_header; f->sname = (char*)r; /* pass schema++ */ return MAL_SUCCEED; @@ -467,7 +480,7 @@ csv_load(void *BE, sql_subfunc *f, char sql_subtype *tp = tn->data; sql_column *c = NULL; - if (mvc_create_column(&c, be->mvc, t, name, tp) != LOG_OK) { + if (!tp || mvc_create_column(&c, be->mvc, t, name, tp) != LOG_OK) { //throw(SQL, SQLSTATE(42000), "csv" RUNTIME_LOAD_ERROR); return NULL; } @@ -480,15 +493,23 @@ csv_load(void *BE, sql_subfunc *f, char sql_subtype tpe; sql_find_subtype(&tpe, "varchar", 0, 0); - char tsep[2], ssep[2]; + char tsep[2], rsep[3], ssep[2]; tsep[0] = r->delim; tsep[1] = 0; ssep[0] = r->quote; ssep[1] = 0; + if (r->extra_tsep) { + rsep[0] = r->delim; + rsep[1] = '\n'; + rsep[2] = 0; + } else { + rsep[0] = '\n'; + rsep[1] = 0; + } list *args = append( append( append( append( append( new_exp_list(sql->sa), exp_atom_ptr(sql->sa, t)), exp_atom_str(sql->sa, tsep, &tpe)), - exp_atom_str(sql->sa, "\n", &tpe)), + exp_atom_str(sql->sa, rsep, &tpe)), exp_atom_str(sql->sa, ssep, &tpe)), exp_atom_str(sql->sa, "", &tpe)); diff --git a/sql/server/rel_select.c b/sql/server/rel_select.c --- a/sql/server/rel_select.c +++ b/sql/server/rel_select.c @@ -563,7 +563,9 @@ file_loader_add_table_column_types(mvc * *d = 0; fl = fl_find(ext); } - if (!fl) + if (!fl) /* fallback */ + fl = fl_find("csv"); + if (!fl) /* not expected */ return sa_message(sql->ta, "Filename extension '%s' missing", ext?ext:""); } str err = fl->add_types(sql, f, filename, res_exps, tname); diff --git a/sql/test/file_loader/Tests/file_loader_function.test b/sql/test/file_loader/Tests/file_loader_function.test --- a/sql/test/file_loader/Tests/file_loader_function.test +++ b/sql/test/file_loader/Tests/file_loader_function.test @@ -7,7 +7,7 @@ select * from file_loader('FileNotFound' statement error 42000!SELECT: file_loader function failed 'File not found' select * from file_loader('/tmp/FileNotFound.csv') -statement error 42000!SELECT: file_loader function failed 'Filename extension 'gz' missing' +statement error 42000!SELECT: file_loader function failed 'File not found' select * from file_loader('/tmp/FileNotFound.gz') statement error 42000!SELECT: file_loader function failed 'File not found' diff --git a/sql/test/file_loader/Tests/file_loader_string.test b/sql/test/file_loader/Tests/file_loader_string.test --- a/sql/test/file_loader/Tests/file_loader_string.test +++ b/sql/test/file_loader/Tests/file_loader_string.test @@ -10,7 +10,7 @@ select * from '/tmp/FileNotFound' statement error 42000!SELECT: file_loader function failed 'File not found' select * from '/tmp/FileNotFound.csv' -statement error 42000!SELECT: file_loader function failed 'Filename extension 'gz' missing' +statement error 42000!SELECT: file_loader function failed 'File not found' select * from '/tmp/FileNotFound.gz' statement error 42000!SELECT: file_loader function failed 'File not found' _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org