Hello,

Attached are fixes for ecl_string_case, which does not handle Unicode
correctly. This shows up in two places in the CL run-time.

Currently the READTABLE-CASE :INVERT issue is preventing Parenscript
2.8 from compiling.

--
Vladimir Sedach
Software engineering services in Los Angeles https://oneofus.la

>From b4de3e816d7396d60a6ac330a7aed4bc7a90d888 Mon Sep 17 00:00:00 2001
From: Vladimir Sedach <v...@oneofus.la>
Date: Sun, 3 May 2020 17:46:59 -0700
Subject: [PATCH 1/2] tests: Add Unicode tests for two problems in internal
 case handling

---
 src/tests/ecl-tests.asd            |  1 +
 src/tests/ecl-tests.lisp           |  2 +-
 src/tests/normal-tests/unicode.lsp | 42 ++++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 src/tests/normal-tests/unicode.lsp

diff --git a/src/tests/ecl-tests.asd b/src/tests/ecl-tests.asd
index fa5ec763..a82c9992 100644
--- a/src/tests/ecl-tests.asd
+++ b/src/tests/ecl-tests.asd
@@ -25,6 +25,7 @@
                          (:file "package-extensions")
 			 (:file "hash-tables")
                          (:file "external-formats" :if-feature :unicode)
+                         (:file "unicode" :if-feature :unicode)
                          (:file "complex")))
                (:module stress-tests
                         :default-component-class asdf:cl-source-file.lsp
diff --git a/src/tests/ecl-tests.lisp b/src/tests/ecl-tests.lisp
index 66594b46..55656e43 100644
--- a/src/tests/ecl-tests.lisp
+++ b/src/tests/ecl-tests.lisp
@@ -22,7 +22,7 @@
 ;;;; Declare the suites
 (suite 'make-check
        '(executable ieee-fp eprocess package-ext hash-tables ansi+ mixed
-         cmp emb ffi mop run-program mp complex))
+         cmp emb ffi mop run-program mp complex #+unicode unicode))
 
 (suite 'ecl-tests
        '(make-check eformat))
diff --git a/src/tests/normal-tests/unicode.lsp b/src/tests/normal-tests/unicode.lsp
new file mode 100644
index 00000000..a3297679
--- /dev/null
+++ b/src/tests/normal-tests/unicode.lsp
@@ -0,0 +1,42 @@
+;;;; -*- encoding:utf-8; Mode: Lisp; Syntax: Common-Lisp; indent-tabs-mode: nil -*-
+;;;; vim: set filetype=lisp tabstop=8 shiftwidth=2 expandtab:
+
+;;;; Author:   Vladimir Sedach <v...@oneofus.la>
+;;;; Created:  2020-05-03
+;;;; Contains: Unicode handling in the compiler and runtime
+
+(in-package :cl-test)
+
+(suite 'unicode)
+
+;;; Date: 2020-05-03
+;;; From: Vladimir Sedach <v...@oneofus.la>
+;;; Fixed: 2020-05-03 (Vladimir Sedach)
+;;; Description:
+;;;
+;;;     Compiler does not handle non-ASCII symbols correctly when
+;;;     READTABLE-CASE is :INVERT
+;;;
+(test unicode.0001.compiler-unicode-inverted-case
+  (let ((test-readtable (copy-readtable)))
+    (dolist (case '(:invert :upcase :downcase :preserve))
+      (setf (readtable-case test-readtable) case)
+      (let ((*readtable* test-readtable))
+        (is (= 3
+               (funcall
+                (compile nil
+                         (cons 'lambda
+                               (read-from-string "((๐›… ๐›œ) (+ ๐›… ๐›œ))")))
+                1 2)))))))
+
+;;; Date: 2020-05-03
+;;; From: Vladimir Sedach <v...@oneofus.la>
+;;; Fixed: 2020-05-03 (Vladimir Sedach)
+;;; Description:
+;;;
+;;;     Pathname :common case conversion fails on Unicode pathnames
+;;;
+(test unicode.0002.pathname-common-unicode
+  (is (equal
+       "ะ”ะะะะซะ•"
+       (pathname-name (pathname "/tmp/ะดะฐะฝะฝั‹ะต.txt") :case :common))))
-- 
2.20.1

>From f261f9bb71d1bead6639de9e4a33b6d6916b33e1 Mon Sep 17 00:00:00 2001
From: Vladimir Sedach <v...@oneofus.la>
Date: Sun, 3 May 2020 17:56:37 -0700
Subject: [PATCH 2/2] character.d: Fixed ecl_string_case Unicode handling

---
 CHANGELOG                    |  3 +++
 src/c/character.d            | 14 +++++++-------
 src/c/printer/write_symbol.d |  4 +---
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index f6d611b4..a8049c07 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -28,6 +28,9 @@
     parameter given to configure script).

 * Pending changes since 20.4.24
+** Issues fixed
+- Compiling Unicode symbol names with :INVERT READTABLE-CASE
+- Pathname functions :CASE :COMMON argument with Unicode pathnames
 * 20.4.24 changes since 16.1.3
 ** Announcement
 Dear Community,
diff --git a/src/c/character.d b/src/c/character.d
index a69b6e4b..fa0b0bf8 100644
--- a/src/c/character.d
+++ b/src/c/character.d
@@ -99,23 +99,23 @@ cl_both_case_p(cl_object c)
 int
 ecl_string_case(cl_object s)
 {
+  /* Returns 1 if string is all uppercase, -1 if all lowercase, and 0 if mixed case */
   int upcase;
   cl_index i;
-  ecl_base_char *text;
-
+
   switch (ecl_t_of(s)) {
 #ifdef ECL_UNICODE
   case t_string:
-    s = si_coerce_to_base_string(s);
 #endif
   case t_base_string:
-    text = (ecl_base_char*)s->base_string.self;
-    for (i = 0, upcase = 0; i < s->base_string.dim; i++) {
-      if (ecl_upper_case_p(text[i])) {
+    for (i = 0, upcase = 0; i < s->base_string.fillp; i++) {
+      ecl_character c = ecl_char(s, i);
+
+      if (ecl_upper_case_p(c)) {
         if (upcase < 0)
           return 0;
         upcase = +1;
-      } else if (ecl_lower_case_p(text[i])) {
+      } else if (ecl_lower_case_p(c)) {
         if (upcase > 0)
           return 0;
         upcase = -1;
diff --git a/src/c/printer/write_symbol.d b/src/c/printer/write_symbol.d
index a39bab97..ab4c05e0 100644
--- a/src/c/printer/write_symbol.d
+++ b/src/c/printer/write_symbol.d
@@ -62,8 +62,6 @@ potential_number_p(cl_object s, int base)
   return some_digit;
 }

-#define needs_to_be_inverted(s) (ecl_string_case(s) != 0)
-
 static bool
 all_dots(cl_object s)
 {
@@ -119,7 +117,7 @@ write_symbol_string(cl_object s, int action, cl_object print_case,
   cl_index i;
   bool capitalize;
   if (action == ecl_case_invert) {
-    if (!needs_to_be_inverted(s))
+    if (ecl_string_case(s) == 0)
       action = ecl_case_preserve;
   }
   cl_object buffer = si_get_buffer_string();
--
2.20.1

Reply via email to