Changeset: 05ea89e56eb8 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=05ea89e56eb8
Modified Files:
        gdk/ChangeLog
        gdk/gdk_utils.c
Branch: default
Log Message:

Store non-UTF-8 environment values in separate list, store escaped in env.


diffs (175 lines):

diff --git a/gdk/ChangeLog b/gdk/ChangeLog
--- a/gdk/ChangeLog
+++ b/gdk/ChangeLog
@@ -1,6 +1,13 @@
 # ChangeLog file for GDK
 # This file is updated with Maddlog
 
+* Fri Jan 29 2021 Sjoerd Mullender <sjo...@acm.org>
+- Environment variables (sys.env()) must be UTF-8, but since they can
+  contain file names which may not be UTF-8, there is now a mechanism
+  to store the original values outside of sys.env() and store
+  %-escaped (similar to URL escaping) values in the environment.  The
+  key must still be UTF-8.
+
 * Tue Dec  1 2020 Sjoerd Mullender <sjo...@acm.org>
 - We now save the location of the min and max values when known.
 
diff --git a/gdk/gdk_utils.c b/gdk/gdk_utils.c
--- a/gdk/gdk_utils.c
+++ b/gdk/gdk_utils.c
@@ -94,9 +94,24 @@ GDKenvironment(const char *dbpath)
        return true;
 }
 
+static struct orig_value {
+       struct orig_value *next;
+       char *value;
+       char key[];
+} *orig_value;
+static MT_Lock GDKenvlock = MT_LOCK_INITIALIZER(GDKenvlock);
+
 const char *
 GDKgetenv(const char *name)
 {
+       MT_lock_set(&GDKenvlock);
+       for (struct orig_value *ov = orig_value; ov; ov = ov->next) {
+               if (strcmp(ov->key, name) == 0) {
+                       MT_lock_unset(&GDKenvlock);
+                       return ov->value;
+               }
+       }
+       MT_lock_unset(&GDKenvlock);
        if (GDKkey && GDKval) {
                BUN b = BUNfnd(GDKkey, (ptr) name);
 
@@ -138,13 +153,110 @@ GDKgetenv_int(const char *name, int def)
        return def;
 }
 
+#define ESCAPE_CHAR    '%'
+
+static bool
+isutf8(const char *v, size_t *esclen)
+{
+       size_t n = 1;
+       int nutf8 = 0;
+       int m = 0;
+       for (size_t i = 0; v[i]; i++) {
+               if (nutf8 > 0) {
+                       if ((v[i] & 0xC0) != 0x80 ||
+                           (m != 0 && (v[i] & m) == 0))
+                               goto badutf8;
+                       m = 0;
+                       nutf8--;
+               } else if ((v[i] & 0xE0) == 0xC0) {
+                       nutf8 = 1;
+                       if ((v[i] & 0x1E) == 0)
+                               goto badutf8;
+               } else if ((v[i] & 0xF0) == 0xE0) {
+                       nutf8 = 2;
+                       if ((v[i] & 0x0F) == 0)
+                               m = 0x20;
+               } else if ((v[i] & 0xF8) == 0xF0) {
+                       nutf8 = 3;
+                       if ((v[i] & 0x07) == 0)
+                               m = 0x30;
+               } else if ((v[i] & 0x80) != 0) {
+                       goto badutf8;
+               }
+       }
+       *esclen = 0;
+       return true;
+  badutf8:
+       for (size_t i = 0; v[i]; i++) {
+               if (v[i] & 0x80 || v[i] == ESCAPE_CHAR)
+                       n += 3;
+               else
+                       n++;
+       }
+       *esclen = n;
+       return false;
+}
+
 gdk_return
 GDKsetenv(const char *name, const char *value)
 {
-       if (BUNappend(GDKkey, name, false) != GDK_SUCCEED ||
-           BUNappend(GDKval, value, false) != GDK_SUCCEED)
-               return GDK_FAIL;
-       return GDK_SUCCEED;
+       static const char hexdigits[] = "0123456789abcdef";
+       char *conval = NULL;
+       size_t esclen = 0;
+       if (!isutf8(value, &esclen)) {
+               size_t j = strlen(name) + 1;
+               struct orig_value *ov = GDKmalloc(offsetof(struct orig_value, 
key) + j + strlen(value) + 1);
+               if (ov == NULL)
+                       return GDK_FAIL;
+               strcpy(ov->key, name);
+               ov->value = ov->key + j;
+               strcpy(ov->value, value);
+               conval = GDKmalloc(esclen);
+               if (conval == NULL) {
+                       GDKfree(ov);
+                       return GDK_FAIL;
+               }
+               j = 0;
+               for (size_t i = 0; value[i]; i++) {
+                       if (value[i] & 0x80 || value[i] == ESCAPE_CHAR) {
+                               conval[j++] = ESCAPE_CHAR;
+                               conval[j++] = hexdigits[(unsigned char) 
value[i] >> 4];
+                               conval[j++] = hexdigits[(unsigned char) 
value[i] & 0xF];
+                       } else {
+                               conval[j++] = value[i];
+                       }
+               }
+               conval[j] = 0;
+               MT_lock_set(&GDKenvlock);
+               ov->next = orig_value;
+               orig_value = ov;
+               /* remove previous value if present (later in list) */
+               for (ov = orig_value; ov->next; ov = ov->next) {
+                       if (strcmp(ov->next->key, name) == 0) {
+                               struct orig_value *ovn = ov->next;
+                               ov->next = ovn->next;
+                               GDKfree(ovn);
+                       }
+               }
+               MT_lock_unset(&GDKenvlock);
+       } else {
+               /* remove previous value if present */
+               MT_lock_set(&GDKenvlock);
+               for (struct orig_value **ovp = &orig_value; *ovp; ovp = 
&(*ovp)->next) {
+                       if (strcmp((*ovp)->key, name) == 0) {
+                               struct orig_value *ov = *ovp;
+                               *ovp = ov->next;
+                               GDKfree(ov);
+                               break;
+                       }
+               }
+               MT_lock_unset(&GDKenvlock);
+       }
+       gdk_return rc = BUNappend(GDKkey, name, false);
+       if (rc == GDK_SUCCEED)
+               rc = BUNappend(GDKval, conval ? conval : value, false);
+       GDKfree(conval);
+       return rc;
 }
 
 gdk_return
@@ -1053,6 +1165,14 @@ GDKreset(int status)
 
        join_detached_threads();
 
+       MT_lock_set(&GDKenvlock);
+       while (orig_value) {
+               struct orig_value *ov = orig_value;
+               orig_value = orig_value->next;
+               GDKfree(ov);
+       }
+       MT_lock_unset(&GDKenvlock);
+
        if (status == 0) {
                /* they had their chance, now kill them */
                bool killed = false;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to