Hi-

I've been playing with this wide char stuff, and I have a patch that
would move the encoding of characters to UCS-4.

This is completely useless on its own, because, in this
patch, the internal encoding of strings is still 8-bit chars, and,
thus, there is no way to use the wide characters in strings.

It is all pretty simple.  Since the internal representation of chars
becomes UCS-4, I used scm_t_uint32 as the char type, and I removed the
code that supported EBCDIC-encoded characters.  I changed the tables
of character names to deal with more names and discontiguous control
characters.  And, as a temporary kludge, I made a macro
SCM_MAKE_8BIT_CHAR to cast the 8-bit characters used in strings to a
scm_t_uint32.  Also, I used functions from the Gnulib unicase and
unictype modules for character properties, including a couple that
Bruno Haible of Gnulib was kind enough to create for me.

Thanks,

Mike

The gnulib invocation for this was

gnulib-tool --import --dir=. --lib=libgnu --source-base=lib
--m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux
--lgpl --libtool --macro-prefix=gl --no-vc-files 
alloca-opt autobuild count-one-bits extensions full-read full-write
strcase strftime unicase/tolower unicase/toupper
unictype/property-alphabetic unictype/property-lowercase
unictype/property-numeric unictype/property-uppercase
unictype/property-white-space
diff --git a/libguile/chars.c b/libguile/chars.c
index 909e11d..73387a9 100644
--- a/libguile/chars.c
+++ b/libguile/chars.c
@@ -29,8 +29,11 @@
 #include "libguile/chars.h"
 #include "libguile/srfi-14.h"
 
+#include "lib/unicase.h"
+#include "lib/unictype.h"
 
 
+
 SCM_DEFINE (scm_char_p, "char?", 1, 0, 0, 
             (SCM x),
            "Return @code{#t} iff @var{x} is a character, else @code{#f}.")
@@ -54,7 +57,7 @@ SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr, 
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is less than @var{y} in the ASCII 
sequence,\n"
+            "Return @code{#t} iff @var{x} is less than @var{y} in the Unicode 
sequence,\n"
             "else @code{#f}.")
 #define FUNC_NAME s_scm_char_less_p
 {
@@ -67,7 +70,7 @@ SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
 SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
             "Return @code{#t} iff @var{x} is less than or equal to @var{y} in 
the\n"
-            "ASCII sequence, else @code{#f}.")
+            "Imocpde sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_leq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -78,7 +81,7 @@ SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
 
 SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is greater than @var{y} in the 
ASCII\n"
+            "Return @code{#t} iff @var{x} is greater than @var{y} in the 
Unicode\n"
             "sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_gr_p
 {
@@ -91,7 +94,7 @@ SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
 SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
             "Return @code{#t} iff @var{x} is greater than or equal to @var{y} 
in the\n"
-            "ASCII sequence, else @code{#f}.")
+            "Unicode sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_geq_p
 {
   SCM_VALIDATE_CHAR (1, x);
@@ -103,60 +106,64 @@ SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
 SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
             "Return @code{#t} iff @var{x} is the same character as @var{y} 
ignoring\n"
-            "case, else @code{#f}.")
+            "case, else @code{#f}.  Case is computed in the Unicode locale.")
 #define FUNC_NAME s_scm_char_ci_eq_p
 {
   SCM_VALIDATE_CHAR (1, x);
   SCM_VALIDATE_CHAR (2, y);
-  return scm_from_bool (scm_c_upcase(SCM_CHAR(x))==scm_c_upcase(SCM_CHAR(y)));
+  return scm_from_bool (uc_toupper(SCM_CHAR(x))==uc_toupper(SCM_CHAR(y)));
 }
 #undef FUNC_NAME
 
 SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is less than @var{y} in the ASCII 
sequence\n"
-            "ignoring case, else @code{#f}.")
+            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
less\n"
+            "than the Unicode uppercase form @var{y} in the Unicode 
sequence,\n"
+            "else @code{#f}.")
 #define FUNC_NAME s_scm_char_ci_less_p
 {
   SCM_VALIDATE_CHAR (1, x);
   SCM_VALIDATE_CHAR (2, y);
-  return scm_from_bool ((scm_c_upcase(SCM_CHAR(x))) < 
scm_c_upcase(SCM_CHAR(y)));
+  return scm_from_bool (uc_toupper (SCM_CHAR(x)) < uc_toupper (SCM_CHAR(y)));
 }
 #undef FUNC_NAME
 
 SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is less than or equal to @var{y} in 
the\n"
-            "ASCII sequence ignoring case, else @code{#f}.")
+            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
less\n"
+            "than or equal to the Unicode uppercase form of @var{y} in the\n"
+            "Unicode  sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_ci_leq_p
 {
   SCM_VALIDATE_CHAR (1, x);
   SCM_VALIDATE_CHAR (2, y);
-  return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) <= 
scm_c_upcase(SCM_CHAR(y)));
+  return scm_from_bool (uc_toupper (SCM_CHAR(x)) <= uc_toupper (SCM_CHAR(y)));
 }
 #undef FUNC_NAME
 
 SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is greater than @var{y} in the 
ASCII\n"
-            "sequence ignoring case, else @code{#f}.")
+            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
greater\n"
+            "than the Unicode uppercase form of @var{y} in the Unicode\n"
+            "sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_ci_gr_p
 {
   SCM_VALIDATE_CHAR (1, x);
   SCM_VALIDATE_CHAR (2, y);
-  return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) > scm_c_upcase(SCM_CHAR(y)));
+  return scm_from_bool (uc_toupper (SCM_CHAR(x)) > uc_toupper (SCM_CHAR(y)));
 }
 #undef FUNC_NAME
 
 SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
              (SCM x, SCM y),
-            "Return @code{#t} iff @var{x} is greater than or equal to @var{y} 
in the\n"
-            "ASCII sequence ignoring case, else @code{#f}.")
+            "Return @code{#t} iff the Unicode uppercase form of @var{x} is 
greater\n"
+            "than or equal to the Unicode uppercase form of @var{y} in the\n"
+            "Unicode sequence, else @code{#f}.")
 #define FUNC_NAME s_scm_char_ci_geq_p
 {
   SCM_VALIDATE_CHAR (1, x);
   SCM_VALIDATE_CHAR (2, y);
-  return scm_from_bool (scm_c_upcase(SCM_CHAR(x)) >= 
scm_c_upcase(SCM_CHAR(y)));
+  return scm_from_bool (uc_toupper (SCM_CHAR(x)) >= uc_toupper (SCM_CHAR(y)));
 }
 #undef FUNC_NAME
 
@@ -166,7 +173,7 @@ SCM_DEFINE (scm_char_alphabetic_p, "char-alphabetic?", 1, 
0, 0,
            "Return @code{#t} iff @var{chr} is alphabetic, else @code{#f}.\n")
 #define FUNC_NAME s_scm_char_alphabetic_p
 {
-  return scm_char_set_contains_p (scm_char_set_letter, chr);
+  return scm_from_bool (uc_is_property_alphabetic (SCM_CHAR(chr)));
 }
 #undef FUNC_NAME
 
@@ -175,7 +182,7 @@ SCM_DEFINE (scm_char_numeric_p, "char-numeric?", 1, 0, 0,
            "Return @code{#t} iff @var{chr} is numeric, else @code{#f}.\n")
 #define FUNC_NAME s_scm_char_numeric_p
 {
-  return scm_char_set_contains_p (scm_char_set_digit, chr);
+  return scm_from_bool (uc_is_property_numeric (SCM_CHAR(chr)));
 }
 #undef FUNC_NAME
 
@@ -184,7 +191,7 @@ SCM_DEFINE (scm_char_whitespace_p, "char-whitespace?", 1, 
0, 0,
            "Return @code{#t} iff @var{chr} is whitespace, else @code{#f}.\n")
 #define FUNC_NAME s_scm_char_whitespace_p
 {
-  return scm_char_set_contains_p (scm_char_set_whitespace, chr);
+  return scm_from_bool (uc_is_property_white_space (SCM_CHAR(chr)));
 }
 #undef FUNC_NAME
 
@@ -195,7 +202,7 @@ SCM_DEFINE (scm_char_upper_case_p, "char-upper-case?", 1, 
0, 0,
            "Return @code{#t} iff @var{chr} is uppercase, else @code{#f}.\n")
 #define FUNC_NAME s_scm_char_upper_case_p
 {
-  return scm_char_set_contains_p (scm_char_set_upper_case, chr);
+  return scm_from_bool (uc_is_property_uppercase (SCM_CHAR(chr)));
 }
 #undef FUNC_NAME
 
@@ -205,7 +212,7 @@ SCM_DEFINE (scm_char_lower_case_p, "char-lower-case?", 1, 
0, 0,
            "Return @code{#t} iff @var{chr} is lowercase, else @code{#f}.\n")
 #define FUNC_NAME s_scm_char_lower_case_p
 {
-  return scm_char_set_contains_p (scm_char_set_lower_case, chr);
+  return scm_from_bool (uc_is_property_lowercase (SCM_CHAR(chr)));
 }
 #undef FUNC_NAME
 
@@ -216,9 +223,8 @@ SCM_DEFINE (scm_char_is_both_p, "char-is-both?", 1, 0, 0,
            "Return @code{#t} iff @var{chr} is either uppercase or lowercase, 
else @code{#f}.\n")
 #define FUNC_NAME s_scm_char_is_both_p
 {
-  if (scm_is_true (scm_char_set_contains_p (scm_char_set_lower_case, chr)))
-    return SCM_BOOL_T;
-  return scm_char_set_contains_p (scm_char_set_upper_case, chr);
+  return scm_from_bool (uc_is_property_uppercase (SCM_CHAR(chr))
+                       || uc_is_property_lowercase (SCM_CHAR(chr)));
 }
 #undef FUNC_NAME
 
@@ -232,7 +238,7 @@ SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
 #define FUNC_NAME s_scm_char_to_integer
 {
   SCM_VALIDATE_CHAR (1, chr);
-  return scm_from_ulong (SCM_CHAR(chr));
+  return scm_from_uint32 (SCM_CHAR(chr));
 }
 #undef FUNC_NAME
 
@@ -243,18 +249,29 @@ SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
            "Return the character at position @var{n} in the ASCII sequence.")
 #define FUNC_NAME s_scm_integer_to_char
 {
-  return SCM_MAKE_CHAR (scm_to_uchar (n));
+  scm_t_uint32 cn;
+
+  SCM_ASSERT (scm_is_integer (n), n, SCM_ARG1, FUNC_NAME);
+  cn = scm_to_uint32 (n);
+
+  if (cn > SCM_CODEPOINT_MAX)
+    scm_out_of_range (FUNC_NAME, n);
+
+  /* The Unicode surrogates are not true codepoints.  */
+  if (cn >= SCM_CODEPOINT_SURROGATE_START && cn <= SCM_CODEPOINT_SURROGATE_END)
+    scm_out_of_range (FUNC_NAME, n);
+
+  return SCM_MAKE_CHAR (cn);
 }
 #undef FUNC_NAME
 
-
 SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0, 
            (SCM chr),
            "Return the uppercase character version of @var{chr}.")
 #define FUNC_NAME s_scm_char_upcase
 {
   SCM_VALIDATE_CHAR (1, chr);
-  return SCM_MAKE_CHAR (toupper (SCM_CHAR (chr)));
+  return SCM_MAKE_CHAR (uc_toupper (SCM_CHAR (chr)));
 }
 #undef FUNC_NAME
 
@@ -265,7 +282,7 @@ SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
 #define FUNC_NAME s_scm_char_downcase
 {
   SCM_VALIDATE_CHAR (1, chr);
-  return SCM_MAKE_CHAR (tolower (SCM_CHAR(chr)));
+  return SCM_MAKE_CHAR (uc_tolower (SCM_CHAR(chr)));
 }
 #undef FUNC_NAME
 
@@ -278,80 +295,74 @@ TODO: change name  to scm_i_.. ? --hwn
 */
 
 
-int
-scm_c_upcase (unsigned int c)
+scm_t_uint32
+scm_c_upcase (scm_t_uint32 c)
 {
-  if (c <= UCHAR_MAX)
-    return toupper (c);
+  if (c <= SCM_CODEPOINT_MAX)
+    return uc_toupper (c);
   else
     return c;
 }
 
 
-int
-scm_c_downcase (unsigned int c)
+scm_t_uint32
+scm_c_downcase (scm_t_uint32 c)
 {
-  if (c <= UCHAR_MAX)
-    return tolower (c);
+  if (c <= SCM_CODEPOINT_MAX)
+    return uc_tolower (c);
   else
     return c;
 }
 
-
-#ifdef _DCC
-# define ASCII
-#else
-# if (('\n'=='\025') && (' '=='\100') && ('a'=='\201') && ('A'=='\301'))
-#  define EBCDIC
-# endif /*  (('\n'=='\025') && (' '=='\100') && ('a'=='\201') && 
('A'=='\301')) */
-# if (('\n'=='\012') && (' '=='\040') && ('a'=='\141') && ('A'=='\101'))
-#  define ASCII
-# endif /*  (('\n'=='\012') && (' '=='\040') && ('a'=='\141') && 
('A'=='\101')) */
-#endif /* def _DCC */
-
-
-#ifdef EBCDIC
-char *const scm_charnames[] =
-{
-  "nul", "soh", "stx", "etx", "pf", "ht", "lc", "del",
-   0   , 0   , "smm", "vt", "ff", "cr", "so", "si",
-  "dle", "dc1", "dc2", "dc3", "res", "nl", "bs", "il",
-  "can", "em", "cc", 0   , "ifs", "igs", "irs", "ius",
-   "ds", "sos", "fs", 0   , "byp", "lf", "eob", "pre",
-   0   , 0   , "sm", 0   , 0   , "enq", "ack", "bel",
-   0   , 0   , "syn", 0   , "pn", "rs", "uc", "eot",
-   0   , 0   , 0   , 0   , "dc4", "nak", 0   , "sub",
-   "space", scm_s_newline, "tab", "backspace", "return", "page", "null"};
-
-const char scm_charnums[] =
-"\000\001\002\003\004\005\006\007\
-\010\011\012\013\014\015\016\017\
-\020\021\022\023\024\025\026\027\
-\030\031\032\033\034\035\036\037\
-\040\041\042\043\044\045\046\047\
-\050\051\052\053\054\055\056\057\
-\060\061\062\063\064\065\066\067\
-\070\071\072\073\074\075\076\077\
- \n\t\b\r\f\0";
-#endif /* def EBCDIC */
-#ifdef ASCII
-char *const scm_charnames[] =
-{
-  "nul","soh","stx","etx","eot","enq","ack","bel",
-   "bs", "ht", "newline", "vt", "np", "cr", "so", "si",
-  "dle","dc1","dc2","dc3","dc4","nak","syn","etb",
-  "can", "em","sub","esc", "fs", "gs", "rs", "us",
-  "space", "sp", "nl", "tab", "backspace", "return", "page", "null", "del"};
-const char scm_charnums[] =
-"\000\001\002\003\004\005\006\007\
-\010\011\012\013\014\015\016\017\
-\020\021\022\023\024\025\026\027\
-\030\031\032\033\034\035\036\037\
-  \n\t\b\r\f\0\177";
-#endif /* def ASCII */
+/* The abbreviated names for control characters.  */
+char *const scm_charnames[] = 
+  {
+    /* C0 controls */
+    "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
+    "bs",  "ht",  "newline",  "vt",  "np",  "cr",  "so",  "si",
+    "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", 
+    "can", "em",  "sub", "esc", "fs",  "gs",  "rs",  "us",
+    "del",
+    /* C1 controls */
+    "bph", "nbh", "ind", "nel", "ssa", "esa", 
+    "hts", "htj", "vts", "pld", "plu", "ri" , "ss2", "ss3",
+    "dcs", "pu1", "pu2", "sts", "cch", "mw" , "spa", "epa",
+    "sos", "sci", "csi", "st",  "osc", "pm",  "apc"
+  };
+
+const scm_t_uint32 scm_charnums[] = 
+  {
+    /* C0 controls */
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x7f,
+    /* C1 controls */
+    0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+    0x98, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f
+  };
 
 int scm_n_charnames = sizeof (scm_charnames) / sizeof (char *);
 
+/* There are common aliases for control characters.  */
+char *const scm_alt_charnames[] = 
+  {
+    "lf", "ff", "space", "sp", "nl", "tab", "backspace",
+    "return", "page", "null", "nbsp", "shy"
+  };
+  
+const scm_t_uint32 scm_alt_charnums[] = 
+  {
+    0x0a, 0x0c, 0x20, 0x20, 0x0a, 0x09, 0x08,
+    0x0d, 0x0c, 0x00, 0xa0, 0xad
+  };
+
+
+int scm_n_alt_charnames = sizeof (scm_alt_charnames) / sizeof (char *);
+
 
 
 
diff --git a/libguile/chars.h b/libguile/chars.h
index 97c611a..ec7e874 100644
--- a/libguile/chars.h
+++ b/libguile/chars.h
@@ -25,17 +25,27 @@
 #include "libguile/__scm.h"
 
 
+
+#define SCM_CODEPOINT_MAX (0x10FFFF)
+#define SCM_CODEPOINT_SURROGATE_START (0xD800)
+#define SCM_CODEPOINT_SURROGATE_END (0xDFFF)
+
+
 /* Immediate Characters
  */
 #define SCM_CHARP(x) (SCM_ITAG8(x) == scm_tc8_char)
-#define SCM_CHAR(x) ((unsigned int)SCM_ITAG8_DATA(x))
-#define SCM_MAKE_CHAR(x) SCM_MAKE_ITAG8((scm_t_bits) (unsigned char) (x), 
scm_tc8_char)
+#define SCM_CHAR(x) ((scm_t_uint32)SCM_ITAG8_DATA(x))
+#define SCM_MAKE_8BIT_CHAR(x) SCM_MAKE_ITAG8((scm_t_bits) (scm_t_uint32) 
(unsigned char) (x), scm_tc8_char)
+#define SCM_MAKE_CHAR(x) SCM_MAKE_ITAG8((scm_t_bits) (scm_t_uint32) (x), 
scm_tc8_char)
 
 
 
 SCM_API char *const scm_charnames[];
 SCM_API int scm_n_charnames;
-SCM_API const char scm_charnums[];
+SCM_API const scm_t_uint32 scm_charnums[];
+SCM_API char *const scm_alt_charnames[];
+SCM_API int scm_n_alt_charnames;
+SCM_API const scm_t_uint32 scm_alt_charnums[];
 
 
 
@@ -60,10 +70,9 @@ SCM_API SCM scm_char_to_integer (SCM chr);
 SCM_API SCM scm_integer_to_char (SCM n);
 SCM_API SCM scm_char_upcase (SCM chr);
 SCM_API SCM scm_char_downcase (SCM chr);
-SCM_API int scm_c_upcase (unsigned int c);
-SCM_API int scm_c_downcase (unsigned int c);
+SCM_API scm_t_uint32 scm_c_upcase (scm_t_uint32 c);
+SCM_API scm_t_uint32 scm_c_downcase (scm_t_uint32 c);
 SCM_INTERNAL void scm_init_chars (void);
-
 #endif  /* SCM_CHARS_H */
 
 /*
--- a/libguile/hash.c
+++ b/libguile/hash.c
@@ -63,7 +63,7 @@ scm_hasher(SCM obj, unsigned long n, size_t d)
     return SCM_I_INUM(obj) % n;   /* SCM_INUMP(obj) */
   case scm_tc3_imm24:
     if (SCM_CHARP(obj))
-      return (unsigned)(scm_c_downcase(SCM_CHAR(obj))) % n;
+      return (unsigned long)(scm_c_downcase(SCM_CHAR(obj))) % n;
     switch (SCM_UNPACK (obj)) {
 #ifndef SICP
     case SCM_UNPACK(SCM_EOL):
diff --git a/libguile/load.c b/libguile/load.c
index 5ca4e07..d14c04c 100644
--- a/libguile/load.c
+++ b/libguile/load.c
@@ -182,9 +182,9 @@ SCM_DEFINE (scm_parse_path, "parse-path", 1, 1, 0,
 #define FUNC_NAME s_scm_parse_path
 {
 #ifdef __MINGW32__
-  SCM sep = SCM_MAKE_CHAR (';');
+  SCM sep = SCM_MAKE_8BIT_CHAR (';');
 #else
-  SCM sep = SCM_MAKE_CHAR (':');
+  SCM sep = SCM_MAKE_8BIT_CHAR (':');
 #endif
   
   if (SCM_UNBNDP (tail))
diff --git a/libguile/print.c b/libguile/print.c
index d218837..0dcb75b 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -435,21 +435,41 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
     case scm_tc3_imm24:
       if (SCM_CHARP (exp))
        {
-         long i = SCM_CHAR (exp);
+         scm_t_uint32 i = SCM_CHAR (exp);
+         int c;
+         int found = 0;
 
          if (SCM_WRITINGP (pstate))
            {
              scm_puts ("#\\", port);
-             if ((i >= 0) && (i <= ' ') && scm_charnames[i])
-               scm_puts (scm_charnames[i], port);
-#ifndef EBCDIC
-             else if (i == '\177')
-               scm_puts (scm_charnames[scm_n_charnames - 1], port);
-#endif
-             else if (i < 0 || i > '\177')
-               scm_intprint (i, 8, port);
-             else
-               scm_putc (i, port);
+             for (c = 0; c < scm_n_charnames; c++)
+               {
+                 if (scm_charnums[c] == i)
+                   {
+                     scm_puts (scm_charnames[c], port);
+                     found = 1;
+                     break;
+                   }
+               }
+             if (!found)
+               {
+                 for (c = 0; c < scm_n_alt_charnames; c++)
+                   {
+                     if (scm_alt_charnums[c] == i)
+                       {
+                         scm_puts (scm_alt_charnames[c], port);
+                         found = 1;
+                         break;
+                       }
+                   }
+               }
+             if (!found)
+               {
+                 if (i < 0 || i > 127)
+                   scm_intprint (i, 8, port);
+                 else
+                   scm_putc (i, port);
+               }
            }
          else
            scm_putc (i, port);
@@ -1038,14 +1058,14 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
            continue;
          default:
            SCM_MISC_ERROR ("FORMAT: Unsupported format option ~~~A - use 
(ice-9 format) instead",
-                           scm_list_1 (SCM_MAKE_CHAR (*p)));
+                           scm_list_1 (SCM_MAKE_8BIT_CHAR (*p)));
            
          }
 
 
        if (!scm_is_pair (args))
          SCM_MISC_ERROR ("FORMAT: Missing argument for ~~~A",
-                         scm_list_1 (SCM_MAKE_CHAR (*p)));
+                         scm_list_1 (SCM_MAKE_8BIT_CHAR (*p)));
                                        
        scm_lfwrite (start, p - start - 1, port);
        /* we pass destination here */
diff --git a/libguile/rdelim.c b/libguile/rdelim.c
index c9cc016..4a45600 100644
--- a/libguile/rdelim.c
+++ b/libguile/rdelim.c
@@ -223,7 +223,7 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
     {
       if (s[slen-1] == '\n')
        {
-         term = SCM_MAKE_CHAR ('\n');
+         term = SCM_MAKE_8BIT_CHAR ('\n');
          s[slen-1] = '\0';
          line = scm_take_locale_stringn (s, slen-1);
          SCM_INCLINE (port);
diff --git a/libguile/read.c b/libguile/read.c
index 47b8004..0cf6dc4 100644
--- a/libguile/read.c
+++ b/libguile/read.c
@@ -750,21 +750,34 @@ scm_read_character (int chr, SCM port)
 
   if (*charname >= '0' && *charname < '8')
     {
-      /* Dirk:FIXME::  This type of character syntax is not R5RS
-       * compliant.  Further, it should be verified that the constant
-       * does only consist of octal digits.  Finally, it should be
-       * checked whether the resulting fixnum is in the range of
-       * characters.  */
+      /* FIXME:: This type of character syntax is not R5RS
+       * compliant.  */
+      for (c = 0; c < charname_len; c++)
+       {
+         if (charname[c] < '0' || charname[c] > '8')
+           scm_i_input_error (FUNC_NAME, port, "invalid character escape ~a",
+                              scm_list_1 (scm_from_locale_stringn (charname, 
+                                                                   
charname_len)));
+       }
       SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
-      if (SCM_I_INUMP (p))
-       return SCM_MAKE_CHAR (SCM_I_INUM (p));
+      if (scm_is_integer (p)
+         && scm_is_true (scm_geq_p (p, scm_from_int (0)))
+         && scm_is_true (scm_leq_p (p, scm_from_int (SCM_CODEPOINT_MAX)))
+         && (scm_is_true (scm_less_p (p, scm_from_int 
(SCM_CODEPOINT_SURROGATE_START)))
+             || scm_is_true (scm_gr_p (p, scm_from_int 
(SCM_CODEPOINT_SURROGATE_END)))))
+       return scm_integer_to_char (p);
     }
 
   for (c = 0; c < scm_n_charnames; c++)
-    if (scm_charnames[c]
+    if ((strlen (scm_charnames[c]) == charname_len)
        && (!strncasecmp (scm_charnames[c], charname, charname_len)))
       return SCM_MAKE_CHAR (scm_charnums[c]);
 
+  for (c = 0; c < scm_n_alt_charnames; c++)
+    if ((strlen (scm_alt_charnames[c]) == charname_len)
+       && (!strncasecmp (scm_alt_charnames[c], charname, charname_len)))
+      return SCM_MAKE_CHAR (scm_alt_charnums[c]);
+
  char_error:
   scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
                     scm_list_1 (scm_from_locale_stringn (charname,
diff --git a/libguile/srfi-13.c b/libguile/srfi-13.c
index c8ca780..e8e65ca 100644
--- a/libguile/srfi-13.c
+++ b/libguile/srfi-13.c
@@ -141,7 +141,7 @@ SCM_DEFINE (scm_string_any, "string-any-c-code", 2, 2, 0,
 
       while (cstart < cend)
         {
-          res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cstart]));
+          res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cstart]));
           if (scm_is_true (res))
             break;
          cstr = scm_i_string_chars (s);
@@ -210,7 +210,7 @@ SCM_DEFINE (scm_string_every, "string-every-c-code", 2, 2, 
0,
 
       while (cstart < cend)
         {
-          res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cstart]));
+          res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cstart]));
           if (scm_is_false (res))
             break;
           cstr = scm_i_string_chars (s);
@@ -277,7 +277,7 @@ SCM_DEFINE (scm_substring_to_list, "string->list", 1, 2, 0,
   while (cstart < cend)
     {
       cend--;
-      result = scm_cons (SCM_MAKE_CHAR (cstr[cend]), result);
+      result = scm_cons (SCM_MAKE_8BIT_CHAR (cstr[cend]), result);
       cstr = scm_i_string_chars (str);
     }
   scm_remember_upto_here_1 (str);
@@ -756,7 +756,7 @@ SCM_DEFINE (scm_string_trim, "string-trim", 1, 3, 0,
        {
          SCM res;
 
-         res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cstart]));
+         res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cstart]));
          if (scm_is_false (res))
            break;
          cstr = scm_i_string_chars (s);
@@ -834,7 +834,7 @@ SCM_DEFINE (scm_string_trim_right, "string-trim-right", 1, 
3, 0,
        {
          SCM res;
 
-         res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cend - 1]));
+         res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cend - 1]));
          if (scm_is_false (res))
            break;
          cstr = scm_i_string_chars (s);
@@ -930,7 +930,7 @@ SCM_DEFINE (scm_string_trim_both, "string-trim-both", 1, 3, 
0,
        {
          SCM res;
 
-         res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cstart]));
+         res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cstart]));
          if (scm_is_false (res))
            break;
          cstr = scm_i_string_chars (s);
@@ -940,7 +940,7 @@ SCM_DEFINE (scm_string_trim_both, "string-trim-both", 1, 3, 
0,
        {
          SCM res;
 
-         res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cend - 1]));
+         res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cend - 1]));
          if (scm_is_false (res))
            break;
          cstr = scm_i_string_chars (s);
@@ -1964,7 +1964,7 @@ SCM_DEFINE (scm_string_index, "string-index", 2, 2, 0,
       while (cstart < cend)
        {
          SCM res;
-         res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cstart]));
+         res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cstart]));
          if (scm_is_true (res))
            goto found;
          cstr = scm_i_string_chars (s);
@@ -2032,7 +2032,7 @@ SCM_DEFINE (scm_string_index_right, "string-index-right", 
2, 2, 0,
        {
          SCM res;
          cend--;
-         res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cend]));
+         res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cend]));
          if (scm_is_true (res))
            goto found;
          cstr = scm_i_string_chars (s);
@@ -2120,7 +2120,7 @@ SCM_DEFINE (scm_string_skip, "string-skip", 2, 2, 0,
       while (cstart < cend)
        {
          SCM res;
-         res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cstart]));
+         res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cstart]));
          if (scm_is_false (res))
            goto found;
          cstr = scm_i_string_chars (s);
@@ -2190,7 +2190,7 @@ SCM_DEFINE (scm_string_skip_right, "string-skip-right", 
2, 2, 0,
        {
          SCM res;
          cend--;
-         res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cend]));
+         res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cend]));
          if (scm_is_false (res))
            goto found;
          cstr = scm_i_string_chars (s);
@@ -2259,7 +2259,7 @@ SCM_DEFINE (scm_string_count, "string-count", 2, 2, 0,
       while (cstart < cend)
        {
          SCM res;
-         res = pred_tramp (char_pred, SCM_MAKE_CHAR (cstr[cstart]));
+         res = pred_tramp (char_pred, SCM_MAKE_8BIT_CHAR (cstr[cstart]));
          if (scm_is_true (res))
            count++;
          cstr = scm_i_string_chars (s);
@@ -2513,7 +2513,7 @@ string_titlecase_x (SCM str, size_t start, size_t end)
   sz = (unsigned char *) scm_i_string_writable_chars (str);
   for(i = start; i < end;  i++)
     {
-      if (scm_is_true (scm_char_alphabetic_p (SCM_MAKE_CHAR (sz[i]))))
+      if (scm_is_true (scm_char_alphabetic_p (SCM_MAKE_8BIT_CHAR (sz[i]))))
        {
          if (!in_word)
            {
@@ -2843,7 +2843,7 @@ SCM_DEFINE (scm_string_fold, "string-fold", 3, 2, 0,
   while (cstart < cend)
     {
       unsigned int c = (unsigned char) cstr[cstart];
-      result = scm_call_2 (kons, SCM_MAKE_CHAR (c), result);
+      result = scm_call_2 (kons, SCM_MAKE_8BIT_CHAR (c), result);
       cstr = scm_i_string_chars (s);
       cstart++;
     }
@@ -2874,7 +2874,7 @@ SCM_DEFINE (scm_string_fold_right, "string-fold-right", 
3, 2, 0,
   while (cstart < cend)
     {
       unsigned int c  = (unsigned char) cstr[cend - 1];
-      result = scm_call_2 (kons, SCM_MAKE_CHAR (c), result);
+      result = scm_call_2 (kons, SCM_MAKE_8BIT_CHAR (c), result);
       cstr = scm_i_string_chars (s);
       cend--;
     }
@@ -3028,7 +3028,7 @@ SCM_DEFINE (scm_string_for_each, "string-for-each", 2, 2, 
0,
   while (cstart < cend)
     {
       unsigned int c = (unsigned char) cstr[cstart];
-      proc_tramp (proc, SCM_MAKE_CHAR (c));
+      proc_tramp (proc, SCM_MAKE_8BIT_CHAR (c));
       cstr = scm_i_string_chars (s);
       cstart++;
     }
@@ -3425,7 +3425,7 @@ SCM_DEFINE (scm_string_filter, "string-filter", 2, 2, 0,
       while (idx < cend)
        {
          SCM res, ch;
-         ch = SCM_MAKE_CHAR (cstr[idx]);
+         ch = SCM_MAKE_8BIT_CHAR (cstr[idx]);
          res = pred_tramp (char_pred, ch);
          if (scm_is_true (res))
            ls = scm_cons (ch, ls);
@@ -3561,7 +3561,7 @@ SCM_DEFINE (scm_string_delete, "string-delete", 2, 2, 0,
       idx = cstart;
       while (idx < cend)
        {
-         SCM res, ch = SCM_MAKE_CHAR (cstr[idx]);
+         SCM res, ch = SCM_MAKE_8BIT_CHAR (cstr[idx]);
          res = pred_tramp (char_pred, ch);
          if (scm_is_false (res))
            ls = scm_cons (ch, ls);
diff --git a/libguile/strings.c b/libguile/strings.c
index c138026..e4cc48c 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -681,7 +681,7 @@ SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
   else
     scm_out_of_range (NULL, k);
 
-  return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
+  return SCM_MAKE_8BIT_CHAR (scm_i_string_chars (str)[idx]);
 }
 #undef FUNC_NAME
 
@@ -690,7 +690,7 @@ scm_c_string_ref (SCM str, size_t p)
 {
   if (p >= scm_i_string_length (str))
     scm_out_of_range (NULL, scm_from_size_t (p));
-  return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
+  return SCM_MAKE_8BIT_CHAR (scm_i_string_chars (str)[p]);
 }
 
 SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
diff --git a/libguile/struct.c b/libguile/struct.c
index cae0f31..cdbe8c9 100644
--- a/libguile/struct.c
+++ b/libguile/struct.c
@@ -87,7 +87,7 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 
0, 0,
            break;
          default:
            SCM_MISC_ERROR ("unrecognized field type: ~S", 
-                           scm_list_1 (SCM_MAKE_CHAR (field_desc[x])));
+                           scm_list_1 (SCM_MAKE_8BIT_CHAR (field_desc[x])));
          }
 
        switch (field_desc[x + 1])
@@ -110,7 +110,7 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 
1, 0, 0,
            break;
          default:
            SCM_MISC_ERROR ("unrecognized ref specification: ~S",
-                           scm_list_1 (SCM_MAKE_CHAR (field_desc[x + 1])));
+                           scm_list_1 (SCM_MAKE_8BIT_CHAR (field_desc[x + 
1])));
          }
 #if 0
        if (field_desc[x] == 'd')
@@ -707,7 +707,7 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0,
 
     default:
       SCM_MISC_ERROR ("unrecognized field type: ~S",
-                     scm_list_1 (SCM_MAKE_CHAR (field_type)));
+                     scm_list_1 (SCM_MAKE_8BIT_CHAR (field_type)));
     }
 
   return answer;
@@ -784,7 +784,7 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0,
 
     default:
       SCM_MISC_ERROR ("unrecognized field type: ~S",
-                     scm_list_1 (SCM_MAKE_CHAR (field_type)));
+                     scm_list_1 (SCM_MAKE_8BIT_CHAR (field_type)));
     }
 
   return val;
diff --git a/libguile/unif.c b/libguile/unif.c
index daf0850..78bd6ed 100644
--- a/libguile/unif.c
+++ b/libguile/unif.c
@@ -171,7 +171,7 @@ prototype_to_type (SCM proto)
 
   if (scm_is_eq (proto, SCM_BOOL_T))
     type_name = "b";
-  else if (scm_is_eq (proto, SCM_MAKE_CHAR (0)))
+  else if (scm_is_eq (proto, SCM_MAKE_8BIT_CHAR (0)))
     type_name = "s8";
   else if (SCM_CHARP (proto))
     type_name = "a";
@@ -215,9 +215,9 @@ scm_i_get_old_prototype (SCM uvec)
   if (scm_is_bitvector (uvec))
     return SCM_BOOL_T;
   else if (scm_is_string (uvec))
-    return SCM_MAKE_CHAR ('a');
+    return SCM_MAKE_8BIT_CHAR ('a');
   else if (scm_is_true (scm_s8vector_p (uvec)))
-    return SCM_MAKE_CHAR ('\0');
+    return SCM_MAKE_8BIT_CHAR ('\0');
   else if (scm_is_true (scm_s16vector_p (uvec)))
     return scm_sym_s;
   else if (scm_is_true (scm_u32vector_p (uvec)))
@@ -802,7 +802,7 @@ SCM_DEFINE (scm_dimensions_to_uniform_array, 
"dimensions->uniform-array", 2, 1,
       /* Using #\nul as the prototype yields a s8 array, but numeric
         arrays can't store characters, so we have to special case this.
       */
-      if (scm_is_eq (prot, SCM_MAKE_CHAR (0)))
+      if (scm_is_eq (prot, SCM_MAKE_8BIT_CHAR (0)))
        fill = scm_from_int (0);
       else
        fill = prot;
@@ -1106,7 +1106,7 @@ SCM_DEFINE (scm_enclose_array, "enclose-array", 1, 0, 1,
   noutr = ndim - ninr;
   if (noutr < 0)
     SCM_WRONG_NUM_ARGS ();
-  axv = scm_make_string (scm_from_int (ndim), SCM_MAKE_CHAR (0));
+  axv = scm_make_string (scm_from_int (ndim), SCM_MAKE_8BIT_CHAR (0));
   res = scm_i_make_ra (noutr, 1);
   SCM_I_ARRAY_BASE (res) = SCM_I_ARRAY_BASE (ra_inr);
   SCM_I_ARRAY_V (res) = ra_inr;
@@ -1118,7 +1118,7 @@ SCM_DEFINE (scm_enclose_array, "enclose-array", 1, 0, 1,
       SCM_I_ARRAY_DIMS (ra_inr)[k].lbnd = s[j].lbnd;
       SCM_I_ARRAY_DIMS (ra_inr)[k].ubnd = s[j].ubnd;
       SCM_I_ARRAY_DIMS (ra_inr)[k].inc = s[j].inc;
-      scm_c_string_set_x (axv, j, SCM_MAKE_CHAR (1));
+      scm_c_string_set_x (axv, j, SCM_MAKE_8BIT_CHAR (1));
     }
   c_axv = scm_i_string_chars (axv);
   for (j = 0, k = 0; k < noutr; k++, j++)
diff --git a/libguile/vports.c b/libguile/vports.c
index 564f0e7..de1a8f7 100644
--- a/libguile/vports.c
+++ b/libguile/vports.c
@@ -59,7 +59,7 @@ sf_flush (SCM port)
     {
       /* write the byte. */
       scm_call_1 (SCM_SIMPLE_VECTOR_REF (stream, 0),
-                 SCM_MAKE_CHAR (*pt->write_buf));
+                 SCM_MAKE_8BIT_CHAR (*pt->write_buf));
       pt->write_pos = pt->write_buf;
   
       /* flush the output.  */

Reply via email to