commit 59936c7d972587a47d61161279bb8e8abc0b02f3
Author:     NRK <n...@disroot.org>
AuthorDate: Thu Jul 4 21:27:47 2024 +0000
Commit:     Hiltjo Posthuma <hil...@codemadness.org>
CommitDate: Sun Jul 14 11:42:58 2024 +0200

    render invalid utf8 sequences as U+FFFD
    
    previously drw_text would do the width calculations as if
    invalid utf8 sequences were replaced with U+FFFD but would pass
    the invalid utf8 sequence to xft to render where xft would just
    cut it off at the first invalid byte.
    
    this change makes invalid utf8 render as U+FFFD and avoids
    sending invalid sequences to xft. the following can be used to
    check the behavior before and after the patch:
    
            $ printf "0\xef1234567\ntest" | dmenu
    
    Ref: https://lists.suckless.org/dev/2407/35646.html

diff --git a/drw.c b/drw.c
index eb71da7..f151ae5 100644
--- a/drw.c
+++ b/drw.c
@@ -237,7 +237,8 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned 
int h, unsigned int lp
        XftResult result;
        int charexists = 0, overflow = 0;
        /* keep track of a couple codepoints for which we have no match. */
-       static unsigned int nomatches[128], ellipsis_width;
+       static unsigned int nomatches[128], ellipsis_width, invalid_width;
+       static const char invalid[] = "�";
 
        if (!drw || (render && (!drw->scheme || !w)) || !text || !drw->fonts)
                return 0;
@@ -257,6 +258,10 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned 
int h, unsigned int lp
        usedfont = drw->fonts;
        if (!ellipsis_width && render)
                ellipsis_width = drw_fontset_getwidth(drw, "...");
+       if (!invalid_width) {
+               invalid_width = -1; /* stop infinite recursion */
+               invalid_width = drw_fontset_getwidth(drw, invalid);
+       }
        while (1) {
                ew = ellipsis_len = utf8err = utf8charlen = utf8strlen = 0;
                utf8str = text;
@@ -284,9 +289,9 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned 
int h, unsigned int lp
                                                else
                                                        utf8strlen = 
ellipsis_len;
                                        } else if (curfont == usedfont) {
-                                               utf8strlen += utf8charlen;
                                                text += utf8charlen;
-                                               ew += tmpw;
+                                               utf8strlen += utf8err ? 0 : 
utf8charlen;
+                                               ew += utf8err ? 0 : tmpw;
                                        } else {
                                                nextfont = curfont;
                                        }
@@ -294,7 +299,7 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned 
int h, unsigned int lp
                                }
                        }
 
-                       if (overflow || !charexists || nextfont)
+                       if (overflow || !charexists || nextfont || utf8err)
                                break;
                        else
                                charexists = 0;
@@ -309,6 +314,12 @@ drw_text(Drw *drw, int x, int y, unsigned int w, unsigned 
int h, unsigned int lp
                        x += ew;
                        w -= ew;
                }
+               if (utf8err && (!render || invalid_width < w)) {
+                       if (render)
+                               drw_text(drw, x, y, w, h, 0, invalid, invert);
+                       x += invalid_width;
+                       w -= invalid_width;
+               }
                if (render && overflow)
                        drw_text(drw, ellipsis_x, y, ellipsis_w, h, 0, "...", 
invert);
 

Reply via email to