Hi all I am back to playing with parrot for a while.
Discovered that the UTF8 encoding function was producing garbage courtesy of two macros in unicode.h; I have fixed these and added a test to string.t that does a transcode to UTF8. Patch below - but has already been committed, since nobody ever seems to have cancelled my privileges :-) -- Peter Gibbs EmKel Systems Index: include/parrot/unicode.h =================================================================== RCS file: /cvs/public/parrot/include/parrot/unicode.h,v retrieving revision 1.5 diff -u -r1.5 unicode.h --- include/parrot/unicode.h 21 Jul 2003 18:00:42 -0000 1.5 +++ include/parrot/unicode.h 20 Aug 2003 09:41:18 -0000 @@ -68,8 +68,8 @@ #define UTF8_IS_CONTINUATION(c) ((c) >= 0x80u && (c) <= 0xBFu) #define UTF8_IS_CONTINUED(c) ((c) & 0x80u) -#define UTF8_START_MARK(len) (0xFEu << (7-len)) -#define UTF8_START_MASK(len) (0x1Fu >> (len-2)) +#define UTF8_START_MARK(len) (len == 1 ? 0 : 0x7Eu << (7-len)) +#define UTF8_START_MASK(len) (len == 1 ? 0x7Fu : 0x1Fu >> (len-2)) #define UTF8_CONTINUATION_MARK 0x80u #define UTF8_ACCUMULATION_SHIFT 6 Index: t/op/string.t =================================================================== RCS file: /cvs/public/parrot/t/op/string.t,v retrieving revision 1.53 diff -u -r1.53 string.t --- t/op/string.t 8 Aug 2003 08:44:15 -0000 1.53 +++ t/op/string.t 20 Aug 2003 09:41:19 -0000 @@ -1,6 +1,6 @@ #! perl -w -use Parrot::Test tests => 116; +use Parrot::Test tests => 117; use Test::More; output_is( <<'CODE', <<OUTPUT, "set_s_s|sc" ); @@ -1976,6 +1976,18 @@ ABCY abc Y +OUTPUT + +output_is( <<'CODE', <<OUTPUT, "transcode to utf8"); + set S1, "ASCII is the same as UTF8\n" + find_encoding I1, "utf8" + transcode S2, S1, I1 + print S1 + print S2 + end +CODE +ASCII is the same as UTF8 +ASCII is the same as UTF8 OUTPUT # Set all string registers to values given by &$_[0](reg num)