Ok, I figured out a fix (attached).

write_symbol_string was attempting to use some kind of buffering
similar to writestr_stream in file.d, which buffering I replaced with
calls to ecl_write_char.

This simplified the code, and does not seem to have had a noticeable
negative effect on the performance of printing symbols:

--8<---------------cut here---------------start------------->8---
> (defvar j (read-from-string "абракадабра"))

J
> (time (dotimes (i 1000000) (with-output-to-string (x1) (print j x1))))
--8<---------------cut here---------------end--------------->8---

Before:

real time : 5.349 secs
run time  : 6.827 secs
gc count  : 552 times
consed    : 1055997968 bytes
NIL

After:

real time : 5.278 secs
run time  : 7.756 secs
gc count  : 257 times
consed    : 1039999200 bytes
NIL

--
Vladimir Sedach
Software engineering services in Los Angeles https://oneofus.la

>From d491ecdd771404859869070cf6b9480b88aa005a Mon Sep 17 00:00:00 2001
From: Vladimir Sedach <v...@oneofus.la>
Date: Mon, 4 May 2020 19:32:50 -0700
Subject: [PATCH] write_symbol.d: Fix printing of Unicode symbol names

---
 CHANGELOG                          |  1 +
 src/c/printer/write_symbol.d       | 26 ++++++--------------------
 src/tests/normal-tests/unicode.lsp |  4 ++--
 3 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index a8049c07..3e4f7cb3 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -29,6 +29,7 @@

 * Pending changes since 20.4.24
 ** Issues fixed
+- Printing Unicode symbol names
 - Compiling Unicode symbol names with :INVERT READTABLE-CASE
 - Pathname functions :CASE :COMMON argument with Unicode pathnames
 * 20.4.24 changes since 16.1.3
diff --git a/src/c/printer/write_symbol.d b/src/c/printer/write_symbol.d
index ab4c05e0..9ac7449d 100644
--- a/src/c/printer/write_symbol.d
+++ b/src/c/printer/write_symbol.d
@@ -100,37 +100,26 @@ needs_to_be_escaped(cl_object s, cl_object readtable, cl_object print_case)
   return 0;
 }

-static inline void
-buffer_write_char(char c, cl_object buffer, cl_object stream, cl_index *buffer_ndx, cl_index buffer_size) {
-  ecl_char_set(buffer, (*buffer_ndx)++, c);
-  if (*buffer_ndx >= buffer_size) {
-    si_fill_pointer_set(buffer, ecl_make_fixnum(buffer_size));
-    si_do_write_sequence(buffer, stream, ecl_make_fixnum(0), ECL_NIL);
-    *buffer_ndx = 0;
-  }
-}
-
 static void
 write_symbol_string(cl_object s, int action, cl_object print_case,
                     cl_object stream, bool escape)
 {
   cl_index i;
   bool capitalize;
+
   if (action == ecl_case_invert) {
     if (ecl_string_case(s) == 0)
       action = ecl_case_preserve;
   }
-  cl_object buffer = si_get_buffer_string();
-  cl_index buffer_size = ecl_fixnum(cl_array_total_size(buffer));
-  cl_index buffer_ndx = 0;
+
   if (escape)
-    buffer_write_char('|', buffer, stream, &buffer_ndx, buffer_size);
+    ecl_write_char('|', stream);
   capitalize = 1;
   for (i = 0;  i < s->base_string.fillp;  i++) {
     ecl_character c = ecl_char(s, i);
     if (escape) {
       if (c == '|' || c == '\\') {
-        buffer_write_char('\\', buffer, stream, &buffer_ndx, buffer_size);
+        ecl_write_char('\\', stream);
       }
     } else if (action != ecl_case_preserve) {
       if (ecl_upper_case_p(c)) {
@@ -155,13 +144,10 @@ write_symbol_string(cl_object s, int action, cl_object print_case,
         capitalize = !ecl_alphanumericp(c);
       }
     }
-    buffer_write_char(c, buffer, stream, &buffer_ndx, buffer_size);
+    ecl_write_char(c, stream);
   }
   if (escape)
-    buffer_write_char('|', buffer, stream, &buffer_ndx, buffer_size);
-  si_fill_pointer_set(buffer, ecl_make_fixnum(buffer_ndx));
-  si_do_write_sequence(buffer, stream, ecl_make_fixnum(0), ECL_NIL);
-  si_put_buffer_string(buffer);
+    ecl_write_char('|', stream);
 }

 static bool
diff --git a/src/tests/normal-tests/unicode.lsp b/src/tests/normal-tests/unicode.lsp
index 6a4bc887..e0f9f476 100644
--- a/src/tests/normal-tests/unicode.lsp
+++ b/src/tests/normal-tests/unicode.lsp
@@ -43,7 +43,7 @@

 ;;; Date: 2020-05-03
 ;;; From: Vladimir Sedach <v...@oneofus.la>
-;;; Fixed: 2020-05-03 (Vladimir Sedach)
+;;; Fixed: 2020-05-04 (Vladimir Sedach)
 ;;; Description:
 ;;;
 ;;;     Symbol names that contain Unicode are not printed correctly
@@ -55,7 +55,7 @@

 ;;; Date: 2020-05-03
 ;;; From: Vladimir Sedach <v...@oneofus.la>
-;;; Fixed: 2020-05-03 (Vladimir Sedach)
+;;; Fixed: 2020-05-04 (Vladimir Sedach)
 ;;; Description:
 ;;;
 ;;;     unicode.0003.print-unicode-symbols shows up as a compiler
--
2.20.1

Reply via email to