Changeset: d0a155c796b6 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/d0a155c796b6
Modified Files:
        clients/odbc/tests/odbcconnect.c
Branch: Aug2024
Log Message:

Make odbcconnect handle full utf-16, not just ascii-16


diffs (90 lines):

diff --git a/clients/odbc/tests/odbcconnect.c b/clients/odbc/tests/odbcconnect.c
--- a/clients/odbc/tests/odbcconnect.c
+++ b/clients/odbc/tests/odbcconnect.c
@@ -338,7 +338,7 @@ do_sqlbrowseconnect(const char *target)
 {
        void *target_buf;
        SQLSMALLINT target_len = SQL_NTS;
-       make_arg(false, target, &target_buf, &target_len);
+       make_arg(use_wide, target, &target_buf, &target_len);
 
 
        SQLSMALLINT n;
@@ -462,9 +462,13 @@ do_execute_stmt(void)
                        for (int i = 1; i <= colcount; i++) {
                                SQLLEN n;
                                outabuf[0] = '\0';
-                               SQLRETURN ret = SQLGetData(stmt, i, SQL_C_CHAR, 
outabuf, OUTBUF_SIZE, &n);
+                               SQLRETURN ret = use_wide
+                                       ? SQLGetData(stmt, i, SQL_C_WCHAR, 
outwbuf, OUTBUF_SIZE * sizeof(SQLWCHAR), &n)
+                                       : SQLGetData(stmt, i, SQL_C_CHAR, 
outabuf, OUTBUF_SIZE, &n);
                                if (!SQL_SUCCEEDED(ret))
                                        ensure_ok(SQL_HANDLE_STMT, stmt, 
"SQLGetData", ret);
+                               if (use_wide)
+                                       convert_outw_outa(n);
                                printf("%s;", outabuf);
                        }
                        printf("\n");
@@ -584,17 +588,54 @@ gen_utf16(SQLWCHAR *dest, const char *sr
        return p;
 }
 
+static inline SQLCHAR
+continuation_byte(uint32_t val, int n)
+{
+       val >>= 6 * n; // chop off right hand bits
+       val &= 0x3F;   // chop off left hand bits
+       val |= 0x80;   // add continuation marker bit
+       return val;
+}
+
 static void
 convert_outw_outa(size_t n)
 {
-       // outw mostly holds connection strings and those are mostly ascii
-       for (size_t i = 0; i < n; i++) {
-               SQLWCHAR w = outwbuf[i];
-               if (w > 127) {
-                       fprintf(stderr, "Sorry, this test is lazy and should be 
extended to non-ascii utf-16\n");
-                       exit(1);
+       SQLWCHAR *end = &outwbuf[n];
+       SQLWCHAR *in = &outwbuf[0];
+       SQLCHAR *out = &outabuf[0];
+
+       while (in < end) {
+               SQLWCHAR w = *in++;
+               uint32_t codepoint;
+               if (w < 0xD800 || w >= 0xE000) {
+                       codepoint = w;
+               } else if (w < 0xDC00 && in < end && *in >= 0xDC00 && *in < 
0xE000) {
+                       uint32_t hi = w - 0xD800;
+                       uint32_t lo = *in++ - 0xDC00;
+                       codepoint = 0x10000 + (hi << 10) + lo;
+               } else {
+                       strcpy((char*)out, "!!INVALID UTF-16 OR A BUG IN THE 
TEST ITSELF!!");
+                       break;
                }
-               outabuf[i] = (SQLCHAR)w;
+               if (codepoint == 0xFEFF && out == &outabuf[0]) {
+                       // skip the BOM
+               } else if (codepoint < 0x80) {
+                       *out++ = codepoint;
+               } else if (codepoint < 0x800) {
+                       *out++ = 0xC0 | (codepoint >> 6);
+                       *out++ = continuation_byte(codepoint, 0);
+               } else if (codepoint < 0x10000) {
+                       *out++ = 0xE0 | (codepoint >> 12);
+                       *out++ = continuation_byte(codepoint, 1);
+                       *out++ = continuation_byte(codepoint, 0);
+               } else {
+                       assert(codepoint < 0x110000);
+                       *out++ = 0xF0 | (codepoint >> 18);
+                       *out++ = continuation_byte(codepoint, 2);
+                       *out++ = continuation_byte(codepoint, 1);
+                       *out++ = continuation_byte(codepoint, 0);
+               }
        }
-       outabuf[n] = '\0';
+
+       *out = '\0';
 }
_______________________________________________
checkin-list mailing list -- checkin-list@monetdb.org
To unsubscribe send an email to checkin-list-le...@monetdb.org

Reply via email to