Changeset: 28f30a355975 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/28f30a355975 Branch: odbc_loader Log Message:
merge with default diffs (194 lines): diff --git a/sql/backends/monet5/vaults/csv/csv.c b/sql/backends/monet5/vaults/csv/csv.c --- a/sql/backends/monet5/vaults/csv/csv.c +++ b/sql/backends/monet5/vaults/csv/csv.c @@ -37,15 +37,17 @@ csv_open_file(char* filename) static const char * next_delim(const char *s, const char *e, char delim, char quote) { - bool inquote = false; - for(; s < e; s++) { - if (*s == quote) - inquote = !inquote; - else if (!inquote && *s == delim) + if (s && e) { + bool inquote = false; + for(; s < e; s++) { + if (*s == quote) + inquote = !inquote; + else if (!inquote && *s == delim) + return s; + } + if (s <= e) return s; } - if (s <= e) - return s; return NULL; } @@ -242,6 +244,7 @@ detect_time(const char *s, const char *e static bool detect_date(const char *s, const char *e) { + /* TODO detect negative years */ if ((e-s) != 10) return false; /* YYYY-MM-DD */ @@ -256,6 +259,7 @@ detect_date(const char *s, const char *e static bool detect_timestamp(const char *s, const char *e) { + /* TODO detect negative years */ if ((e-s) != 16) return false; /* DATE TIME */ @@ -276,24 +280,26 @@ detect_types_row(const char *s, const ch int scale = 0; types[i].type = CSV_STRING; - if (n) { + types[i].scale = 0; + if (n && s) { if (detect_null(s,n)) types[i].type = CSV_NULL; else if (detect_bool(s,n)) types[i].type = CSV_BOOLEAN; else if (detect_bigint(s, n)) types[i].type = CSV_BIGINT; - else if (detect_decimal(s, n, &scale)) + else if (detect_decimal(s, n, &scale)) { types[i].type = CSV_DECIMAL; + types[i].scale = scale; + } else if (detect_time(s, n)) types[i].type = CSV_TIME; else if (detect_date(s, n)) types[i].type = CSV_DATE; else if (detect_timestamp(s, n)) types[i].type = CSV_TIMESTAMP; - types[i].scale = scale; + s = n+1; } - s = n+1; } return types; } @@ -310,7 +316,7 @@ detect_types(const char *buf, char delim if (!e) break; - csv_type *ntypes = detect_types_row( cur, e, delim, quote, nr_fields); + csv_type *ntypes = detect_types_row(cur, e, delim, quote, nr_fields); if (!ntypes) return NULL; cur = e+1; @@ -397,9 +403,7 @@ static str csv_relation(mvc *sql, sql_subfunc *f, char *filename, list *res_exps, char *tname) { stream *file = csv_open_file(filename); - char buf[8196+1]; - - if(file == NULL) + if (file == NULL) return RUNTIME_FILE_NOT_FOUND; /* @@ -407,6 +411,7 @@ csv_relation(mvc *sql, sql_subfunc *f, c * detect types * detect header */ + char buf[8196+1]; ssize_t l = mnstr_read(file, buf, 1, 8196); mnstr_close(file); mnstr_destroy(file); @@ -424,7 +429,7 @@ csv_relation(mvc *sql, sql_subfunc *f, c f->tname = tname; - const char *p = buf, *ep = strchr(p, '\n');; + const char *p = buf, *ep = strchr(p, '\n'); list *typelist = sa_list(sql->sa); list *nameslist = sa_list(sql->sa); for(int col = 0; col < nr_fields; col++) { diff --git a/sql/test/file_loader/Tests/cars_empty_line.csv b/sql/test/file_loader/Tests/cars_empty_line.csv new file mode 100644 --- /dev/null +++ b/sql/test/file_loader/Tests/cars_empty_line.csv @@ -0,0 +1,6 @@ +2000,"Ford","Focus",1994 +2001,"Honda",, + +2004,"Tesla","S3XY",2019 +2014,"Lightyear","0",2022 + diff --git a/sql/test/file_loader/Tests/cars_missing_separators.csv b/sql/test/file_loader/Tests/cars_missing_separators.csv new file mode 100644 --- /dev/null +++ b/sql/test/file_loader/Tests/cars_missing_separators.csv @@ -0,0 +1,5 @@ +2000,"Ford","Focus",1994 +2001,"Honda",, +2004,"Tesla","S3XY",2019 +2014,"Lightyear","0",2022 +2020,"Volvo" diff --git a/sql/test/file_loader/Tests/file_loader_field_separator.test.in b/sql/test/file_loader/Tests/file_loader_field_separator.test.in --- a/sql/test/file_loader/Tests/file_loader_field_separator.test.in +++ b/sql/test/file_loader/Tests/file_loader_field_separator.test.in @@ -132,3 +132,57 @@ Lightyear 0 2022 +# tests to load incomplete cars (4 columns, 5 rows) data files with missing field separators or empty lines + +query ITTI nosort +select * from file_loader(r'$TSTSRCDIR/cars_missing_separators.csv') as cars(id, brand, model, "year") +---- +2000 +Ford +Focus +1994 +2001 +Honda +NULL +NULL +2004 +Tesla +S3XY +2019 +2014 +Lightyear +0 +2022 +2020 +Volvo +NULL +NULL + +query ITTI nosort +select * from file_loader(r'$TSTSRCDIR/cars_empty_line.csv') as cars(id, brand, model, "year") +---- +2000 +Ford +Focus +1994 +2001 +Honda +NULL +NULL +NULL +NULL +NULL +NULL +2004 +Tesla +S3XY +2019 +2014 +Lightyear +0 +2022 +NULL +NULL +NULL +NULL + _______________________________________________ checkin-list mailing list -- checkin-list@monetdb.org To unsubscribe send an email to checkin-list-le...@monetdb.org