Ok, I figured out a fix (attached). write_symbol_string was attempting to use some kind of buffering similar to writestr_stream in file.d, which buffering I replaced with calls to ecl_write_char.
This simplified the code, and does not seem to have had a noticeable negative effect on the performance of printing symbols: --8<---------------cut here---------------start------------->8--- > (defvar j (read-from-string "абракадабра")) J > (time (dotimes (i 1000000) (with-output-to-string (x1) (print j x1)))) --8<---------------cut here---------------end--------------->8--- Before: real time : 5.349 secs run time : 6.827 secs gc count : 552 times consed : 1055997968 bytes NIL After: real time : 5.278 secs run time : 7.756 secs gc count : 257 times consed : 1039999200 bytes NIL -- Vladimir Sedach Software engineering services in Los Angeles https://oneofus.la
>From d491ecdd771404859869070cf6b9480b88aa005a Mon Sep 17 00:00:00 2001 From: Vladimir Sedach <v...@oneofus.la> Date: Mon, 4 May 2020 19:32:50 -0700 Subject: [PATCH] write_symbol.d: Fix printing of Unicode symbol names --- CHANGELOG | 1 + src/c/printer/write_symbol.d | 26 ++++++-------------------- src/tests/normal-tests/unicode.lsp | 4 ++-- 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a8049c07..3e4f7cb3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -29,6 +29,7 @@ * Pending changes since 20.4.24 ** Issues fixed +- Printing Unicode symbol names - Compiling Unicode symbol names with :INVERT READTABLE-CASE - Pathname functions :CASE :COMMON argument with Unicode pathnames * 20.4.24 changes since 16.1.3 diff --git a/src/c/printer/write_symbol.d b/src/c/printer/write_symbol.d index ab4c05e0..9ac7449d 100644 --- a/src/c/printer/write_symbol.d +++ b/src/c/printer/write_symbol.d @@ -100,37 +100,26 @@ needs_to_be_escaped(cl_object s, cl_object readtable, cl_object print_case) return 0; } -static inline void -buffer_write_char(char c, cl_object buffer, cl_object stream, cl_index *buffer_ndx, cl_index buffer_size) { - ecl_char_set(buffer, (*buffer_ndx)++, c); - if (*buffer_ndx >= buffer_size) { - si_fill_pointer_set(buffer, ecl_make_fixnum(buffer_size)); - si_do_write_sequence(buffer, stream, ecl_make_fixnum(0), ECL_NIL); - *buffer_ndx = 0; - } -} - static void write_symbol_string(cl_object s, int action, cl_object print_case, cl_object stream, bool escape) { cl_index i; bool capitalize; + if (action == ecl_case_invert) { if (ecl_string_case(s) == 0) action = ecl_case_preserve; } - cl_object buffer = si_get_buffer_string(); - cl_index buffer_size = ecl_fixnum(cl_array_total_size(buffer)); - cl_index buffer_ndx = 0; + if (escape) - buffer_write_char('|', buffer, stream, &buffer_ndx, buffer_size); + ecl_write_char('|', stream); capitalize = 1; for (i = 0; i < s->base_string.fillp; i++) { ecl_character c = ecl_char(s, i); if (escape) { if (c == '|' || c == '\\') { - buffer_write_char('\\', buffer, stream, &buffer_ndx, buffer_size); + ecl_write_char('\\', stream); } } else if (action != ecl_case_preserve) { if (ecl_upper_case_p(c)) { @@ -155,13 +144,10 @@ write_symbol_string(cl_object s, int action, cl_object print_case, capitalize = !ecl_alphanumericp(c); } } - buffer_write_char(c, buffer, stream, &buffer_ndx, buffer_size); + ecl_write_char(c, stream); } if (escape) - buffer_write_char('|', buffer, stream, &buffer_ndx, buffer_size); - si_fill_pointer_set(buffer, ecl_make_fixnum(buffer_ndx)); - si_do_write_sequence(buffer, stream, ecl_make_fixnum(0), ECL_NIL); - si_put_buffer_string(buffer); + ecl_write_char('|', stream); } static bool diff --git a/src/tests/normal-tests/unicode.lsp b/src/tests/normal-tests/unicode.lsp index 6a4bc887..e0f9f476 100644 --- a/src/tests/normal-tests/unicode.lsp +++ b/src/tests/normal-tests/unicode.lsp @@ -43,7 +43,7 @@ ;;; Date: 2020-05-03 ;;; From: Vladimir Sedach <v...@oneofus.la> -;;; Fixed: 2020-05-03 (Vladimir Sedach) +;;; Fixed: 2020-05-04 (Vladimir Sedach) ;;; Description: ;;; ;;; Symbol names that contain Unicode are not printed correctly @@ -55,7 +55,7 @@ ;;; Date: 2020-05-03 ;;; From: Vladimir Sedach <v...@oneofus.la> -;;; Fixed: 2020-05-03 (Vladimir Sedach) +;;; Fixed: 2020-05-04 (Vladimir Sedach) ;;; Description: ;;; ;;; unicode.0003.print-unicode-symbols shows up as a compiler -- 2.20.1