Hi Axel!
On Wed, Feb 10, 2021 at 03:31:21PM +0100, Axel Beckert wrote: > Hi Michael, > > On Wed, Feb 10, 2021 at 08:59:15AM -0500, Michael Schröder wrote: > > diff --git a/src/encoding.c b/src/encoding.c > > index 11c3c41..e1ea364 100644 > > --- a/src/encoding.c > > +++ b/src/encoding.c > > @@ -1164,7 +1164,9 @@ void utf8_handle_comb(unsigned int c, struct mchar > > *mc) > > if (c1 >= 0xd800 && c1 < 0xe000) > > comb_tofront(root, c1 - 0xd800); > > i = combchars[root]->prev; > > - if (c1 == i + 0xd800) { > > + if (i == (unsigned int)root) > > + i = combchars[root ^ 1]->prev; /* steal from other > > root */ > > + if (i == 0x800 || i == 0x801 || c1 == i + 0xd800) { > > /* completely full, can't recycle */ > > mc->image = '?'; > > mc->font = 0; > > Thanks, but this seems to break the actual output. But isn't the output broken anyway? Two years ago, the line c = (c & 255) | (unsigned char)D_rend.font << 8; was deleted from RAW_PUTCHAR, but the utf8_handle_comb function in encode.c was not adapted. Since then, combining characters cannot have worked. Do you have a local patch for this? > With that patch I now get "ÿ " after every wide character in the > output. The beginning now looks like this for me (in the hope it will > be passed properly through mail): > > 円ᆆᆿÿ 忿ᇎᆿÿ 忘ᆿᆿÿ 忿ᆾᆿÿ 応ᆿᆿÿ 忿ᆷᆿÿ 忑ᆿᆿÿ 忿ᇠᆿÿ 冺ᆿᆿÿ 忿ᇇᆿÿ 忟ᆿᆿÿ 忿ᆺᆿÿ 忳ᆿᆿÿ 忿ᅳᆿÿ 忣ᆿᆿÿ > 忿ᇯᆿÿ 忇ᆿᆿÿ 忿ᇅᆿÿ > > Which reminds me a lot of > https://savannah.gnu.org/bugs/index.php?31336 aka > https://bugs.debian.org/600246 but made worse. The patch is actually not 100% correct. The double width combining chars should only be in the 0xdf00 - 0xdfff range. But there's also the underlying issue that comb_tofront() moves entries between the two lists. So here's a revised patch: diff --git a/src/display.c b/src/display.c index 0ec6c33..8a85b90 100644 --- a/src/display.c +++ b/src/display.c @@ -437,7 +437,7 @@ static void RAW_PUTCHAR(uint32_t c) if (D_x == D_width) D_x += D_AM ? 1 : -1; D_mbcs = 0; - } else if (utf8_isdouble(c)) { + } else if (utf8_isdouble(c) || (c >= 0xdf00 && c < 0xe000)) { D_mbcs = c; D_x++; return; diff --git a/src/encoding.c b/src/encoding.c index 11c3c41..ed24d64 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -35,7 +35,7 @@ static int encmatch(char *, char *); static int recode_char(int, int, int); static int recode_char_to_encoding(int, int); -static void comb_tofront(int, int); +static void comb_tofront(int); static int recode_char_dw(int, int *, int, int); static int recode_char_dw_to_encoding(int, int *, int); @@ -1105,9 +1105,10 @@ bool utf8_iscomb(uint32_t c) return bisearch(c, combining, ARRAY_SIZE(combining) - 1); } -static void comb_tofront(int root, int i) +static void comb_tofront(int i) { for (;;) { + int root = i >= 0x700 ? 0x801 : 0x800; combchars[combchars[i]->prev]->next = combchars[i]->next; combchars[combchars[i]->next]->prev = combchars[i]->prev; combchars[i]->next = combchars[root]->next; @@ -1162,9 +1163,9 @@ void utf8_handle_comb(unsigned int c, struct mchar *mc) if (i == combchars[root]->c2) { /* full, recycle old entry */ if (c1 >= 0xd800 && c1 < 0xe000) - comb_tofront(root, c1 - 0xd800); + comb_tofront(c1 - 0xd800); i = combchars[root]->prev; - if (c1 == i + 0xd800) { + if (i == 0x800 || i == 0x801 || c1 == i + 0xd800) { /* completely full, can't recycle */ mc->image = '?'; mc->font = 0; @@ -1180,9 +1181,9 @@ void utf8_handle_comb(unsigned int c, struct mchar *mc) } combchars[i]->c1 = c1; combchars[i]->c2 = c; - mc->image = i & 0xff; - mc->font = (i >> 8) + 0xd8; - comb_tofront(root, i); + mc->image = 0xd800 + i; + mc->font = 0; + comb_tofront(i); } static int encmatch(char *s1, char *s2) Cheers, Michael. -- Michael Schroeder SUSE Software Solutions Germany GmbH m...@suse.de GF: Felix Imendoerffer HRB 36809, AG Nuernberg main(_){while(_=~getchar())putchar(~_-1/(~(_|32)/13*2-11)*13);}