Changeset: 1dd9667d52aa for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1dd9667d52aa
Added Files:
        sql/backends/monet5/sql_bincopyconvert.c
        sql/backends/monet5/sql_bincopyconvert.h
Modified Files:
        sql/backends/monet5/CMakeLists.txt
        sql/backends/monet5/sql_bincopyfrom.c
Branch: copyintobinary
Log Message:

Move bincopy conversion related code to separate file


diffs (truncated from 810 to 300 lines):

diff --git a/sql/backends/monet5/CMakeLists.txt 
b/sql/backends/monet5/CMakeLists.txt
--- a/sql/backends/monet5/CMakeLists.txt
+++ b/sql/backends/monet5/CMakeLists.txt
@@ -144,6 +144,7 @@ target_sources(sql
   sql_strimps.c sql_strimps.h
   sql_time.c
   sql_bincopyfrom.c
+  sql_bincopyconvert.c sql_bincopyconvert.h
   sql_datetrunc.c
   sql_rank.c sql_rank.h
   sql_subquery.c sql_subquery.h
diff --git a/sql/backends/monet5/sql_bincopyconvert.c 
b/sql/backends/monet5/sql_bincopyconvert.c
new file mode 100644
--- /dev/null
+++ b/sql/backends/monet5/sql_bincopyconvert.c
@@ -0,0 +1,340 @@
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0.  If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * Copyright 1997 - July 2008 CWI, August 2008 - 2022 MonetDB B.V.
+ */
+
+#include "monetdb_config.h"
+
+#include "copybinary.h"
+#include "copybinary_support.h"
+#include "sql_bincopyconvert.h"
+#include "sql.h"
+#include "gdk.h"
+#include "mal_backend.h"
+#include "mal_interpreter.h"
+
+static str
+convert_bte(void *start, void *end, bool byteswap)
+{
+       (void)start;
+       (void)end;
+       (void)byteswap;
+
+       return MAL_SUCCEED;
+}
+
+static str
+convert_bit(void *start, void *end, bool byteswap)
+{
+       (void)byteswap;
+       unsigned char *e = end;
+       for (unsigned char *p = start; p < e; p++) {
+               int b = *p;
+               if (b > 1)
+                       throw(SQL, "convert_bit", SQLSTATE(22003) "invalid 
boolean byte value: %d", b);
+       }
+       return MAL_SUCCEED;
+}
+
+static str
+convert_sht(void *start, void *end, bool byteswap)
+{
+       if (byteswap)
+               for (sht *p = start; p < (sht*)end; p++)
+                       copy_binary_convert16(p);
+
+       return MAL_SUCCEED;
+}
+
+static str
+convert_int(void *start, void *end, bool byteswap)
+{
+       if (byteswap)
+               for (int *p = start; p < (int*)end; p++)
+                       copy_binary_convert32(p);
+
+       return MAL_SUCCEED;
+}
+
+static str
+convert_lng(void *start, void *end, bool byteswap)
+{
+       if (byteswap)
+               for (lng *p = start; p < (lng*)end; p++)
+                       copy_binary_convert64(p);
+
+       return MAL_SUCCEED;
+}
+
+#ifdef HAVE_HGE
+static str
+convert_hge(void *start, void *end, bool byteswap)
+{
+       if (byteswap)
+               for (hge *p = start; p < (hge*)end; p++)
+                       copy_binary_convert128(p);
+
+       return MAL_SUCCEED;
+}
+#endif
+
+static str
+convert_uuid(void *start, void *end, bool byteswap)
+{
+       (void)byteswap;
+       size_t nbytes = (char*)end - (char*)start;
+       (void)nbytes; assert(nbytes % 16 == 0);
+
+       return MAL_SUCCEED;
+}
+
+static str
+convert_flt(void *start, void *end, bool byteswap)
+{
+       // Slightly dodgy pointer conversions here
+       assert(sizeof(uint32_t) == sizeof(flt));
+       assert(sizeof(struct { char dummy; uint32_t ui; }) >= sizeof(struct { 
char dummy; flt f; }));
+
+       if (byteswap)
+               for (uint32_t *p = start; (void*)p < end; p++)
+                       copy_binary_convert32(p);
+
+       return MAL_SUCCEED;
+}
+
+static str
+convert_dbl(void *start, void *end, bool byteswap)
+{
+       // Slightly dodgy pointer conversions here
+       assert(sizeof(uint64_t) == sizeof(dbl));
+       assert(sizeof(struct { char dummy; uint64_t ui; }) >= sizeof(struct { 
char dummy; dbl f; }));
+
+
+       if (byteswap)
+               for (uint64_t *p = start; (void*)p < end; p++)
+                       copy_binary_convert64(p);
+
+       return MAL_SUCCEED;
+}
+
+
+static str
+convert_date(void *dst_start, void *dst_end, void *src_start, void *src_end, 
bool byteswap)
+{
+       date *dst = (date*)dst_start;
+       date *dst_e = (date*)dst_end;
+       copy_binary_date *src = (copy_binary_date*)src_start;
+       copy_binary_date *src_e = (copy_binary_date*)src_end;
+       (void)dst_e; assert(dst_e - dst == src_e - src);
+
+       for (; src < src_e; src++) {
+               if (byteswap)
+                       copy_binary_convert_date(src);
+               date value = date_create(src->year, src->month, src->day);
+               *dst++ = value;
+       }
+
+       return MAL_SUCCEED;
+}
+
+static str
+convert_time(void *dst_start, void *dst_end, void *src_start, void *src_end, 
bool byteswap)
+{
+       (void)byteswap;
+       daytime *dst = (daytime*)dst_start;
+       daytime *dst_e = (daytime*)dst_end;
+       copy_binary_time *src = (copy_binary_time*)src_start;
+       copy_binary_time *src_e = (copy_binary_time*)src_end;
+       (void)dst_e; assert(dst_e - dst == src_e - src);
+
+       for (; src < src_e; src++) {
+               if (byteswap)
+                       copy_binary_convert_time(src);
+               daytime value = daytime_create(src->hours, src->minutes, 
src->seconds, src->ms);
+               *dst++ = value;
+       }
+
+       return MAL_SUCCEED;
+}
+
+static str
+convert_timestamp(void *dst_start, void *dst_end, void *src_start, void 
*src_end, bool byteswap)
+{
+       (void)byteswap;
+       timestamp *dst = (timestamp*)dst_start;
+       timestamp *dst_e = (timestamp*)dst_end;
+       copy_binary_timestamp *src = (copy_binary_timestamp*)src_start;
+       copy_binary_timestamp *src_e = (copy_binary_timestamp*)src_end;
+       (void)dst_e; assert(dst_e - dst == src_e - src);
+
+       for (; src < src_e; src++) {
+               if (byteswap)
+                       copy_binary_convert_timestamp(src);
+               date dt = date_create(src->date.year, src->date.month, 
src->date.day);
+               daytime tm = daytime_create(src->time.hours, src->time.minutes, 
src->time.seconds, src->time.ms);
+               timestamp value = timestamp_create(dt, tm);
+               *dst++ = value;
+       }
+
+       return MAL_SUCCEED;
+}
+
+
+static str
+convert_and_validate(char *text)
+{
+       unsigned char *r = (unsigned char*)text;
+       unsigned char *w = r;
+
+       if (*r == 0x80 && *(r+1) == 0) {
+               // Technically a utf-8 violation, but we treat it as the NULL 
marker
+               // GDK does so as well so we can just pass it on.
+               // load_zero_terminated_text() below contains an assert to 
ensure
+               // this remains the case.
+               return MAL_SUCCEED;
+       }
+
+       while (*r != 0) {
+               unsigned char c = *w++ = *r++;
+
+               if (c == '\r' && *r == '\n') {
+                       w--;
+                       continue;
+               }
+               if ((c & 0x80) == 0x00) // 1xxx_xxxx: standalone byte
+                       continue;
+               if ((c & 0xF8) == 0xF0) // 1111_0xxx
+                       goto expect3;
+               if ((c & 0xF0) == 0xE0) // 1110_xxxx
+                       goto expect2;
+               if ((c & 0xE0) == 0xC0) // 110x_xxxx
+                       goto expect1;
+               goto bad_utf8;
+
+expect3:
+               if (((*w++ = *r++) & 0x80) != 0x80)
+                       goto bad_utf8;
+expect2:
+               if (((*w++ = *r++) & 0x80) != 0x80)
+                       goto bad_utf8;
+expect1:
+               if (((*w++ = *r++) & 0x80) != 0x80)
+                       goto bad_utf8;
+
+       }
+       *w = '\0';
+       return MAL_SUCCEED;
+
+bad_utf8:
+       return createException(SQL, "BATattach_stream", SQLSTATE(42000) 
"malformed utf-8 byte sequence");
+}
+
+static str
+append_text(BAT *bat, char *start)
+{
+       str msg = convert_and_validate(start);
+       if (msg != MAL_SUCCEED)
+               return msg;
+
+       if (BUNappend(bat, start, false) != GDK_SUCCEED)
+               return createException(SQL, "sql.importColumn", GDK_EXCEPTION);
+
+       return MAL_SUCCEED;
+}
+
+// Load items from the stream and put them in the BAT.
+// Because it's text read from a binary stream, we replace \r\n with \n.
+// We don't have to validate the utf-8 structure because BUNappend does that 
for us.
+static str
+load_zero_terminated_text(BAT *bat, stream *s, int *eof_reached)
+{
+       str msg = MAL_SUCCEED;
+       bstream *bs = NULL;
+
+       // convert_and_validate() above counts on the following property to 
hold:
+       assert(strNil((const char[2]){ 0x80, 0 }));
+
+       bs = bstream_create(s, 1 << 20);
+       if (bs == NULL) {
+               msg = createException(SQL, "sql", SQLSTATE(HY013) 
MAL_MALLOC_FAIL);
+               goto end;
+       }
+
+       // In the outer loop we refill the buffer until the stream ends.
+       // In the inner loop we look for complete \0-terminated strings.
+       while (1) {
+               ssize_t nread = bstream_next(bs);
+               if (nread < 0)
+                       bailout("%s", mnstr_peek_error(s));
+               if (nread == 0)
+                       break;
+
+               char *buf_start = &bs->buf[bs->pos];
+               char *buf_end = &bs->buf[bs->len];
+               char *start, *end;
+               for (start = buf_start; (end = memchr(start, '\0', buf_end - 
start)) != NULL; start = end + 1) {
+                       msg = append_text(bat, start);
+                       if (msg != NULL)
+                               goto end;
+               }
+               bs->pos = start - buf_start;
+       }
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to