Hello, Attached are fixes for ecl_string_case, which does not handle Unicode correctly. This shows up in two places in the CL run-time.
Currently the READTABLE-CASE :INVERT issue is preventing Parenscript 2.8 from compiling. -- Vladimir Sedach Software engineering services in Los Angeles https://oneofus.la
>From b4de3e816d7396d60a6ac330a7aed4bc7a90d888 Mon Sep 17 00:00:00 2001 From: Vladimir Sedach <v...@oneofus.la> Date: Sun, 3 May 2020 17:46:59 -0700 Subject: [PATCH 1/2] tests: Add Unicode tests for two problems in internal case handling --- src/tests/ecl-tests.asd | 1 + src/tests/ecl-tests.lisp | 2 +- src/tests/normal-tests/unicode.lsp | 42 ++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 src/tests/normal-tests/unicode.lsp diff --git a/src/tests/ecl-tests.asd b/src/tests/ecl-tests.asd index fa5ec763..a82c9992 100644 --- a/src/tests/ecl-tests.asd +++ b/src/tests/ecl-tests.asd @@ -25,6 +25,7 @@ (:file "package-extensions") (:file "hash-tables") (:file "external-formats" :if-feature :unicode) + (:file "unicode" :if-feature :unicode) (:file "complex"))) (:module stress-tests :default-component-class asdf:cl-source-file.lsp diff --git a/src/tests/ecl-tests.lisp b/src/tests/ecl-tests.lisp index 66594b46..55656e43 100644 --- a/src/tests/ecl-tests.lisp +++ b/src/tests/ecl-tests.lisp @@ -22,7 +22,7 @@ ;;;; Declare the suites (suite 'make-check '(executable ieee-fp eprocess package-ext hash-tables ansi+ mixed - cmp emb ffi mop run-program mp complex)) + cmp emb ffi mop run-program mp complex #+unicode unicode)) (suite 'ecl-tests '(make-check eformat)) diff --git a/src/tests/normal-tests/unicode.lsp b/src/tests/normal-tests/unicode.lsp new file mode 100644 index 00000000..a3297679 --- /dev/null +++ b/src/tests/normal-tests/unicode.lsp @@ -0,0 +1,42 @@ +;;;; -*- encoding:utf-8; Mode: Lisp; Syntax: Common-Lisp; indent-tabs-mode: nil -*- +;;;; vim: set filetype=lisp tabstop=8 shiftwidth=2 expandtab: + +;;;; Author: Vladimir Sedach <v...@oneofus.la> +;;;; Created: 2020-05-03 +;;;; Contains: Unicode handling in the compiler and runtime + +(in-package :cl-test) + +(suite 'unicode) + +;;; Date: 2020-05-03 +;;; From: Vladimir Sedach <v...@oneofus.la> +;;; Fixed: 2020-05-03 (Vladimir Sedach) +;;; Description: +;;; +;;; Compiler does not handle non-ASCII symbols correctly when +;;; READTABLE-CASE is :INVERT +;;; +(test unicode.0001.compiler-unicode-inverted-case + (let ((test-readtable (copy-readtable))) + (dolist (case '(:invert :upcase :downcase :preserve)) + (setf (readtable-case test-readtable) case) + (let ((*readtable* test-readtable)) + (is (= 3 + (funcall + (compile nil + (cons 'lambda + (read-from-string "((๐ ๐) (+ ๐ ๐))"))) + 1 2))))))) + +;;; Date: 2020-05-03 +;;; From: Vladimir Sedach <v...@oneofus.la> +;;; Fixed: 2020-05-03 (Vladimir Sedach) +;;; Description: +;;; +;;; Pathname :common case conversion fails on Unicode pathnames +;;; +(test unicode.0002.pathname-common-unicode + (is (equal + "ะะะะะซะ" + (pathname-name (pathname "/tmp/ะดะฐะฝะฝัะต.txt") :case :common)))) -- 2.20.1
>From f261f9bb71d1bead6639de9e4a33b6d6916b33e1 Mon Sep 17 00:00:00 2001 From: Vladimir Sedach <v...@oneofus.la> Date: Sun, 3 May 2020 17:56:37 -0700 Subject: [PATCH 2/2] character.d: Fixed ecl_string_case Unicode handling --- CHANGELOG | 3 +++ src/c/character.d | 14 +++++++------- src/c/printer/write_symbol.d | 4 +--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index f6d611b4..a8049c07 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -28,6 +28,9 @@ parameter given to configure script). * Pending changes since 20.4.24 +** Issues fixed +- Compiling Unicode symbol names with :INVERT READTABLE-CASE +- Pathname functions :CASE :COMMON argument with Unicode pathnames * 20.4.24 changes since 16.1.3 ** Announcement Dear Community, diff --git a/src/c/character.d b/src/c/character.d index a69b6e4b..fa0b0bf8 100644 --- a/src/c/character.d +++ b/src/c/character.d @@ -99,23 +99,23 @@ cl_both_case_p(cl_object c) int ecl_string_case(cl_object s) { + /* Returns 1 if string is all uppercase, -1 if all lowercase, and 0 if mixed case */ int upcase; cl_index i; - ecl_base_char *text; - + switch (ecl_t_of(s)) { #ifdef ECL_UNICODE case t_string: - s = si_coerce_to_base_string(s); #endif case t_base_string: - text = (ecl_base_char*)s->base_string.self; - for (i = 0, upcase = 0; i < s->base_string.dim; i++) { - if (ecl_upper_case_p(text[i])) { + for (i = 0, upcase = 0; i < s->base_string.fillp; i++) { + ecl_character c = ecl_char(s, i); + + if (ecl_upper_case_p(c)) { if (upcase < 0) return 0; upcase = +1; - } else if (ecl_lower_case_p(text[i])) { + } else if (ecl_lower_case_p(c)) { if (upcase > 0) return 0; upcase = -1; diff --git a/src/c/printer/write_symbol.d b/src/c/printer/write_symbol.d index a39bab97..ab4c05e0 100644 --- a/src/c/printer/write_symbol.d +++ b/src/c/printer/write_symbol.d @@ -62,8 +62,6 @@ potential_number_p(cl_object s, int base) return some_digit; } -#define needs_to_be_inverted(s) (ecl_string_case(s) != 0) - static bool all_dots(cl_object s) { @@ -119,7 +117,7 @@ write_symbol_string(cl_object s, int action, cl_object print_case, cl_index i; bool capitalize; if (action == ecl_case_invert) { - if (!needs_to_be_inverted(s)) + if (ecl_string_case(s) == 0) action = ecl_case_preserve; } cl_object buffer = si_get_buffer_string(); -- 2.20.1