Hello,

Added support for utf-8 editing in emacs mode and vi mode for ksh,
and fixed some utf-8 encoding issues.

it is good?

Index: bin/ksh/emacs.c
===================================================================
RCS file: /cvs/src/bin/ksh/emacs.c,v
retrieving revision 1.89
diff -u -p -u -r1.89 emacs.c
--- bin/ksh/emacs.c    9 Oct 2021 21:38:00 -0000    1.89
+++ bin/ksh/emacs.c    12 Aug 2022 01:19:52 -0000
@@ -21,6 +21,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
 #ifndef SMALL
 # include <term.h>
 # include <curses.h>
@@ -102,6 +103,7 @@ static int    x_adj_done;
 static int    xx_cols;
 static int    x_col;
 static int    x_displen;
+static int    x_edp_len;
 static int    x_arg;        /* general purpose arg */
 static int    x_arg_defaulted;/* x_arg not explicitly set; defaulted to 1 */

@@ -133,6 +135,7 @@ static int    x_size_str(char *);
 static int    x_size(int);
 static void    x_zots(char *);
 static void    x_zotc(int);
+static void     x_zotu8c(const char *, int);
 static void    x_load_hist(char **);
 static int    x_search(char *, int, int);
 static int    x_match(char *, char *);
@@ -143,12 +146,15 @@ static void    x_e_ungetc(int);
 static int    x_e_getc(void);
 static int    x_e_getu8(char *, int);
 static void    x_e_putc(int);
+static void     x_e_putu8c(const char *, int);
 static void    x_e_puts(const char *);
 static int    x_comment(int);
 static int    x_fold_case(int);
 static char    *x_lastcp(void);
 static void    do_complete(int, Comp_type);
 static int    isu8cont(unsigned char);
+static int    u8code(const char *);
+static int    u8mblen(unsigned char);

 /* proto's for keybindings */
 static int    x_abort(int);
@@ -276,6 +282,54 @@ isu8cont(unsigned char c)
 }

 int
+u8code(const char *str)
+{
+    int wc, i, len;
+    unsigned char c;
+
+    wc = 0;
+    c = *str++;
+
+    if ((c & 0xf8) == 0xf0 && c < 0xf5) {
+        wc = c & 0x07;
+        len = 4;
+    } else if ((c & 0xf0) == 0xe0) {
+        wc = c & 0x0f;
+        len = 3;
+    } else if ((c & 0xe0) == 0xc0 && c > 0xc1) {
+        wc = c & 0x1f;
+        len = 2;
+    } else {
+        len = 1;
+        wc = c & 0xff;
+    }
+
+    for (i = 1; i < len; i++) {
+        c = *str++;
+        wc = (wc << 6) | (c & 0x3f);
+    }
+
+    return wc;
+}
+
+int
+u8mblen(unsigned char c)
+{
+    int len;
+
+    if ((c & 0xf8) == 0xf0 && c < 0xf5)
+        len = 4;
+    else if ((c & 0xf0) == 0xe0)
+        len = 3;
+    else if ((c & 0xe0) == 0xc0 && c > 0xc1)
+        len = 2;
+    else
+        len = 1;
+
+    return len;
+}
+
+int
 x_emacs(char *buf, size_t len)
 {
     struct kb_entry        *k, *kmatch = NULL;
@@ -449,6 +503,7 @@ x_ins(char *s)
 {
     char    *cp = xcp;
     int    adj = x_adj_done;
+    int    wc;

     if (x_do_ins(s, strlen(s)) < 0)
         return -1;
@@ -462,8 +517,12 @@ x_ins(char *s)
     x_zots(cp);
     if (adj == x_adj_done) {    /* has x_adjust() been called? */
         /* no */
-        for (cp = xlp; cp > xcp; )
-            x_bs(*--cp);
+        for (cp = xlp; cp > xcp; ) {
+            if (isu8cont(*--cp))
+                continue;
+            wc = u8code(cp);
+            x_bs(wc);
+        }
     }

     x_adj_ok = 1;
@@ -511,6 +570,7 @@ x_delete(int nc, int push)
 {
     int    i,j;
     char    *cp;
+    int    len, wc;

     if (nc == 0)
         return;
@@ -531,8 +591,12 @@ x_delete(int nc, int push)
     cp = xcp;
     j = 0;
     i = nc;
-    while (i--) {
-        j += x_size((unsigned char)*cp++);
+    while (i) {
+        wc = u8code(cp);
+        len = u8mblen(*cp);
+        j += x_size(wc);
+        cp += len;
+        i -= len;
     }
     memmove(xcp, xcp+nc, xep - xcp + 1);    /* Copies the null */
     x_adj_ok = 0;            /* don't redraw */
@@ -555,8 +619,12 @@ x_delete(int nc, int push)
     /*x_goto(xcp);*/
     x_adj_ok = 1;
     xlp_valid = false;
-    for (cp = x_lastcp(); cp > xcp; )
-        x_bs(*--cp);
+    for (cp = x_lastcp(); cp > xcp; ) {
+        if (isu8cont(*--cp))
+            continue;
+        wc = u8code(cp);
+        x_bs(wc);
+    }

     return;
 }
@@ -639,16 +707,34 @@ x_fword(void)
 static void
 x_goto(char *cp)
 {
-    if (cp < xbp || cp >= (xbp + x_displen)) {
+    char *ecp;
+    int i, len, wc;
+
+    for (i = 0, ecp = xbp; ecp < xep && i < x_displen; ecp += len) {
+        wc = u8code(ecp);
+        len = u8mblen(*ecp);
+        if (i + x_size(wc) >= x_displen)
+            break;
+        i += x_size(wc);
+    }
+
+    if (cp < xbp || cp >= ecp) {
         /* we are heading off screen */
         xcp = cp;
         x_adjust();
     } else if (cp < xcp) {        /* move back */
-        while (cp < xcp)
-            x_bs((unsigned char)*--xcp);
+        while (cp < xcp) {
+            if (isu8cont(*--xcp))
+                continue;
+            wc = u8code(xcp);
+            x_bs(wc);
+        }
     } else if (cp > xcp) {        /* move forward */
-        while (cp > xcp)
-            x_zotc((unsigned char)*xcp++);
+        while (cp > xcp) {
+            len = u8mblen(*xcp);
+            x_zotu8c(xcp, len);
+            xcp += len;
+        }
     }
 }

@@ -666,27 +752,37 @@ static int
 x_size_str(char *cp)
 {
     int size = 0;
-    while (*cp)
-        size += x_size(*cp++);
+    int len, wc;
+    while (*cp) {
+        wc = u8code(cp);
+        len = u8mblen(*cp);
+        size += x_size(wc);
+        cp += len;
+    }
     return size;
 }

 static int
 x_size(int c)
 {
+    int w;
+    if (c=='\b')
+        return -1;
     if (c=='\t')
         return 4;    /* Kludge, tabs are always four spaces. */
-    if (iscntrl(c))        /* control char */
+    if (c < 255 && iscntrl(c))        /* control char */
         return 2;
-    if (isu8cont(c))
-        return 0;
-    return 1;
+    w = wcwidth(c);
+    if (w == -1)
+        return 1;
+    return w;
 }

 static void
 x_zots(char *str)
 {
     int    adj = x_adj_done;
+    int     len;

     if (str > xbuf && isu8cont(*str)) {
         while (str > xbuf && isu8cont(*str))
@@ -694,8 +790,17 @@ x_zots(char *str)
         x_e_putc('\b');
     }
     x_lastcp();
-    while (*str && str < xlp && adj == x_adj_done)
-        x_zotc(*str++);
+    if (str >= xlp && x_edp_len != 0 && adj == x_adj_done) {
+        len = u8mblen(*str);
+        x_zotu8c(str, len);
+        str += len;
+    }
+
+    while (*str && str < xlp && adj == x_adj_done) {
+        len = u8mblen(*str);
+        x_zotu8c(str, len);
+        str += len;
+    }
 }

 static void
@@ -711,6 +816,15 @@ x_zotc(int c)
         x_e_putc(c);
 }

+static void
+x_zotu8c(const char *str, int len)
+{
+    if (len == 1)
+        return x_zotc(*str);
+    else
+        return x_e_putu8c(str, len);
+}
+
 static int
 x_mv_back(int c)
 {
@@ -1032,6 +1146,7 @@ x_redraw(int limit)
 {
     int    i, j, truncate = 0;
     char    *cp;
+    int    len, wc;

     x_adj_ok = 0;
     if (limit == -2) {
@@ -1071,9 +1186,15 @@ x_redraw(int limit)
         limit = xx_cols;
     if (limit >= 0) {
         if (xep > xlp)
-            i = 0;            /* we fill the line */
-            else
-            i = limit - (xlp - xbp);
+            i = x_edp_len;        /* we fill the line */
+        else {
+            i = limit;
+            for (cp = xbp; cp < xlp; cp += len) {
+                wc = u8code(cp);
+                len = u8mblen(*cp);
+                i -= x_size(wc);
+            }
+        }

         for (j = 0; j < i && x_col < (xx_cols - 2); j++)
             x_e_putc(' ');
@@ -1090,8 +1211,12 @@ x_redraw(int limit)
         while (j--)
             x_e_putc('\b');
     }
-    for (cp = xlp; cp > xcp; )
-        x_bs(*--cp);
+    for (cp = xcp; cp < xlp;) {
+        wc = u8code(cp);
+        len = u8mblen(*cp);
+        x_bs(wc);
+        cp += len;
+    }
     x_adj_ok = 1;
 #ifdef DEBUG
     x_flush();
@@ -1102,7 +1227,10 @@ x_redraw(int limit)
 static int
 x_transpose(int c)
 {
-    char    tmp;
+    char    tmpu8[4];
+    int     blen;
+    int     bpos, bbpos;
+    int     i;

     /* What transpose is meant to do seems to be up for debate. This
      * is a general summary of the options; the text is abcd with the
@@ -1128,25 +1256,41 @@ x_transpose(int c)
         /* Gosling/Unipress emacs style: Swap two characters before the
          * cursor, do not change cursor position
          */
-        x_bs(xcp[-1]);
-        x_bs(xcp[-2]);
-        x_zotc(xcp[-1]);
-        x_zotc(xcp[-2]);
-        tmp = xcp[-1];
-        xcp[-1] = xcp[-2];
-        xcp[-2] = tmp;
+        bpos = -1;
+        while (isu8cont(xcp[bpos]))
+            bpos--;
+        bbpos = bpos - 1;
+        while (isu8cont(xcp[bbpos]))
+            bbpos--;
+        x_bs(u8code(xcp + bpos));
+        x_bs(u8code(xcp + bbpos));
+        blen = u8mblen(xcp[bpos]);
+        x_zotu8c(xcp + bpos, blen);
+        i = u8mblen(xcp[bbpos]);
+        x_zotu8c(xcp + bbpos, i);
+        memmove(tmpu8, xcp + bbpos, i);
+        memmove(xcp + bbpos, xcp + bpos, blen);
+        memmove(xcp + bbpos + blen, tmpu8, i);
     } else {
         /* GNU emacs style: Swap the characters before and under the
          * cursor, move cursor position along one.
          */
-        x_bs(xcp[-1]);
-        x_zotc(xcp[0]);
-        x_zotc(xcp[-1]);
-        tmp = xcp[-1];
-        xcp[-1] = xcp[0];
-        xcp[0] = tmp;
-        x_bs(xcp[0]);
-        x_goto(xcp + 1);
+        bpos = -1;
+        while (isu8cont(xcp[bpos]))
+            bpos--;
+        x_bs(u8code(xcp + bpos));
+        blen = u8mblen(xcp[0]);
+        x_zotu8c(xcp, blen);
+        i = u8mblen(xcp[bpos]);
+        x_zotu8c(xcp + bpos, i);
+
+        memmove(tmpu8, xcp + bpos, i);
+        memmove(xcp + bpos, xcp, blen);
+        memmove(xcp + bpos + blen, tmpu8, i);
+
+        if (blen >= i)
+            x_bs(u8code(xcp + bpos + blen));
+        x_goto(xcp + bpos + blen + i);
     }
     return KSTD;
 }
@@ -1802,12 +1946,22 @@ do_complete(int flags,    /* XCF_{COMMAND,F
 static void
 x_adjust(void)
 {
+    int i;
+    int wc;
+
     x_adj_done++;            /* flag the fact that we were called. */
     /*
      * we had a problem if the prompt length > xx_cols / 2
      */
-    if ((xbp = xcp - (x_displen / 2)) < xbuf)
-        xbp = xbuf;
+    for (i = 0, xbp = xcp; xbp > xbuf && i < (x_displen / 2); xbp--) {
+        if (isu8cont(*xbp))
+            continue;
+
+        wc = u8code(xbp);
+        if (i + x_size(wc) >= (x_displen / 2))
+            break;
+        i += x_size(wc);
+    }
     xlp_valid = false;
     x_redraw(xx_cols);
     x_flush();
@@ -1911,6 +2065,24 @@ x_e_putc(int c)
         x_adjust();
 }

+static void
+x_e_putu8c(const char *str, int len) {
+    int i;
+    int wc, w;
+
+    wc = u8code(str);
+    w = x_size(wc);
+    if (x_col < xx_cols - w + 1) {
+        for (i = 0; i < len; i++) {
+            x_putc(str[i]);
+        }
+        x_col += w;
+    }
+
+    if (x_adj_ok && (x_col < 0 || x_col >= (xx_cols - 2 - w + 1)))
+        x_adjust();
+}
+
 #ifdef DEBUG
 static int
 x_debug_info(int c)
@@ -1933,9 +2105,16 @@ static void
 x_e_puts(const char *s)
 {
     int    adj = x_adj_done;
+    int    len;

-    while (*s && adj == x_adj_done)
-        x_e_putc(*s++);
+    while (*s && adj == x_adj_done) {
+        len = u8mblen(*s);
+        if (len == 1)
+            x_e_putc(*s);
+        else
+            x_e_putu8c(s, len);
+        s += len;
+    }
 }

 /* NAME:
@@ -2156,10 +2335,22 @@ x_lastcp(void)
 {
     char *rcp;
     int i;
+    int len;
+    int wc;

     if (!xlp_valid) {
-        for (i = 0, rcp = xbp; rcp < xep && i < x_displen; rcp++)
-            i += x_size((unsigned char)*rcp);
+        x_edp_len = 0;
+        for (i = 0, rcp = xbp; rcp < xep && i < x_displen; rcp += len) {
+            len = u8mblen(*rcp);
+            wc = u8code(rcp);
+
+            if (i + x_size(wc) > x_displen) {
+                x_edp_len = x_displen - i;
+                break;
+            }
+            if (rcp + len <= xep)
+                i += x_size(wc);
+        }
         xlp = rcp;
     }
     xlp_valid = true;
Index: bin/ksh/main.c
===================================================================
RCS file: /cvs/src/bin/ksh/main.c,v
retrieving revision 1.98
diff -u -p -u -r1.98 main.c
--- bin/ksh/main.c    28 Jun 2019 13:34:59 -0000    1.98
+++ bin/ksh/main.c    12 Aug 2022 01:19:52 -0000
@@ -8,6 +8,7 @@

 #include <errno.h>
 #include <fcntl.h>
+#include <locale.h>
 #include <paths.h>
 #include <pwd.h>
 #include <stdio.h>
@@ -145,6 +146,8 @@ main(int argc, char *argv[])
     pid_t ppid;

     kshname = argv[0];
+
+    setlocale(LC_CTYPE, "");

     if (issetugid()) { /* could later drop privileges */
         if (pledge("stdio rpath wpath cpath fattr flock getpw proc "
Index: bin/ksh/vi.c
===================================================================
RCS file: /cvs/src/bin/ksh/vi.c,v
retrieving revision 1.60
diff -u -p -u -r1.60 vi.c
--- bin/ksh/vi.c    12 Mar 2021 02:10:25 -0000    1.60
+++ bin/ksh/vi.c    12 Aug 2022 01:19:53 -0000
@@ -14,6 +14,7 @@
 #include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
 #ifndef SMALL
 # include <term.h>
 # include <curses.h>
@@ -73,8 +74,14 @@ static void    x_vi_zotc(int);
 static void    vi_pprompt(int);
 static void    vi_error(void);
 static void    vi_macro_reset(void);
+static void    x_vi_ungetc(int);
+static int      x_vi_getc(void);
+static int      x_vi_getu8(char, char *, int);
 static int    x_vi_putbuf(const char *, size_t);
 static int    isu8cont(unsigned char);
+static int      u8code(const char *);
+static int      u8mblen(unsigned char);
+static int      u8width(int);

 #define C_    0x1        /* a valid command that isn't a M_, E_, U_ */
 #define M_    0x2        /* movement command (h, l, etc.) */
@@ -213,10 +220,10 @@ x_vi(char *buf, size_t len)
                     continue;
                 /* must be the end of all the macros */
                 vi_macro_reset();
-                c = x_getc();
+                c = x_vi_getc();
             }
         } else
-            c = x_getc();
+            c = x_vi_getc();

         if (c == -1)
             break;
@@ -260,6 +267,8 @@ vi_hook(int ch)
 {
     static char    curcmd[MAXVICMD], locpat[SRCHLEN];
     static int    cmdlen, argc1, argc2;
+    int        i, len;
+    char        u8c[4];

     switch (state) {

@@ -314,8 +323,11 @@ vi_hook(int ch)
         if (is_bad(ch)) {
             del_range(es->cursor, es->cursor + 1);
             vi_error();
-        } else
-            es->cbuf[es->cursor++] = ch;
+        } else {
+            len = x_vi_getu8(ch, es->cbuf, es->cursor);
+            es->cursor += len;
+            es->linelen += len - 1;
+        }
         refresh_line(1);
         state = VNORMAL;
         break;
@@ -412,7 +424,6 @@ vi_hook(int ch)
             return 0;
         } else if (ch == edchars.werase) {
             struct edstate new_es, *save_es;
-            int i;
             int n = srchlen;

             new_es.cursor = n;
@@ -430,10 +441,16 @@ vi_hook(int ch)
             refresh_line(0);
             return 0;
         } else {
-            if (srchlen == SRCHLEN - 1)
+            if (srchlen >= SRCHLEN - 1)
                 vi_error();
             else {
-                locpat[srchlen++] = ch;
+                len = x_vi_getu8(ch, u8c, 0);
+                for (i = 0; i < len; i++) {
+                    locpat[srchlen++] = u8c[i];
+                    if (srchlen >= SRCHLEN - 1)
+                        vi_error();
+                }
+
                 if ((ch & 0x80) && Flag(FVISHOW8)) {
                     if (es->linelen + 2 > es->cbufsize)
                         vi_error();
@@ -449,7 +466,8 @@ vi_hook(int ch)
                 } else {
                     if (es->linelen >= es->cbufsize)
                         vi_error();
-                    es->cbuf[es->linelen++] = ch;
+                    for (i = 0; i < len; i++)
+                        es->cbuf[es->linelen++] = u8c[i];
                 }
                 es->cursor = es->linelen;
                 refresh_line(0);
@@ -537,7 +555,9 @@ vi_reset(char *buf, size_t len)
 static int
 nextstate(int ch)
 {
-    if (is_extend(ch))
+    if (ch & 0x80)
+        return VFAIL;
+    else if (is_extend(ch))
         return VEXTCMD;
     else if (is_srch(ch))
         return VSEARCH;
@@ -555,6 +575,9 @@ static int
 vi_insert(int ch)
 {
     int    tcursor;
+    int    i, len;
+    char    u8c[4];
+

     if (ch == edchars.erase || ch == CTRL('h')) {
         if (insert == REPLACE) {
@@ -674,17 +697,21 @@ vi_insert(int ch)
     /* End nonstandard vi commands } */

     default:
-        if (es->linelen >= es->cbufsize - 1)
+        len = x_vi_getu8(ch, u8c, 0);
+
+        if (es->linelen >= es->cbufsize - len)
             return -1;
-        ibuf[inslen++] = ch;
+        for (i = 0; i < len; i++)
+            ibuf[inslen++] = u8c[i];
         if (insert == INSERT) {
-            memmove(&es->cbuf[es->cursor+1], &es->cbuf[es->cursor],
+            memmove(&es->cbuf[es->cursor+len], &es->cbuf[es->cursor],
                 es->linelen - es->cursor);
-            es->linelen++;
+            es->linelen += len;
         }
-        es->cbuf[es->cursor++] = ch;
+        for (i = 0; i < len; i++)
+            es->cbuf[es->cursor++] = u8c[i];
         if (insert == REPLACE && es->cursor > es->linelen)
-            es->linelen++;
+            es->linelen += len;
         expanded = NONE;
     }
     return 0;
@@ -696,6 +723,8 @@ vi_cmd(int argcnt, const char *cmd)
     int        ncursor;
     int        cur, c1, c2, c3 = 0;
     int        any;
+    int        len;
+    char        u8c[4];
     struct edstate    *t;

     if (argcnt == 0 && !is_zerocount(*cmd))
@@ -769,7 +798,7 @@ vi_cmd(int argcnt, const char *cmd)
         case 'a':
             modified = 1; hnum = hlast;
             if (es->linelen != 0)
-                while (isu8cont(es->cbuf[++es->cursor]))
+                while (isu8cont(es->cbuf[++es->cursor]) && es->cursor
< es->linelen) /* fix cursor great linelen */
                     continue;
             insert = INSERT;
             break;
@@ -835,11 +864,11 @@ vi_cmd(int argcnt, const char *cmd)
         case 'p':
             modified = 1; hnum = hlast;
             if (es->linelen != 0)
-                es->cursor++;
+                es->cursor += u8mblen(es->cbuf[es->cursor]);
             while (putbuf(ybuf, yanklen, 0) == 0 && --argcnt > 0)
                 ;
-            if (es->cursor != 0)
-                es->cursor--;
+            while (es->cursor != 0 && isu8cont(es->cbuf[--es->cursor]))
+                ;
             if (argcnt != 0)
                 return -1;
             break;
@@ -849,8 +878,8 @@ vi_cmd(int argcnt, const char *cmd)
             any = 0;
             while (putbuf(ybuf, yanklen, 0) == 0 && --argcnt > 0)
                 any = 1;
-            if (any && es->cursor != 0)
-                es->cursor--;
+            while (any && es->cursor != 0 && isu8cont(es->cbuf[--es->cursor]))
+                ;
             if (argcnt != 0)
                 return -1;
             break;
@@ -864,8 +893,8 @@ vi_cmd(int argcnt, const char *cmd)
         case 'D':
             yank_range(es->cursor, es->linelen);
             del_range(es->cursor, es->linelen);
-            if (es->cursor != 0)
-                es->cursor--;
+            while (es->cursor != 0 && isu8cont(es->cbuf[--es->cursor]))
+                ;
             break;

         case 'g':
@@ -937,8 +966,9 @@ vi_cmd(int argcnt, const char *cmd)
                     return -1;

                 del_range(es->cursor, cur);
+                len = x_vi_getu8(cmd[1], u8c, 0);
                 while (argcnt-- > 0)
-                    putbuf(&cmd[1], 1, 0);
+                    putbuf(u8c, len, 0);
                 while (es->cursor > 0)
                     if (!isu8cont(es->cbuf[--es->cursor]))
                         break;
@@ -1090,7 +1120,7 @@ vi_cmd(int argcnt, const char *cmd)
             }
             modified = 1; hnum = hlast;
             if (es->cursor != es->linelen)
-                es->cursor++;
+                es->cursor += u8mblen(es->cbuf[es->cursor]);
             while (*p && !issp(*p)) {
                 argcnt++;
                 p++;
@@ -1100,8 +1130,8 @@ vi_cmd(int argcnt, const char *cmd)
             else if (putbuf(sp, argcnt, 0) != 0)
                 argcnt = -1;
             if (argcnt < 0) {
-                if (es->cursor != 0)
-                    es->cursor--;
+                while (es->cursor != 0 && isu8cont(es->cbuf[--es->cursor]))
+                    ;
                 return -1;
             }
             insert = INSERT;
@@ -1125,8 +1155,8 @@ vi_cmd(int argcnt, const char *cmd)
                     modified = 1; hnum = hlast;
                     *p = tolower(c);
                 }
-                if (es->cursor < es->linelen - 1)
-                    es->cursor++;
+                if (es->cursor < es->linelen - u8mblen(es->cbuf[es->cursor]))
+                    es->cursor += u8mblen(es->cbuf[es->cursor]);
             }
             break;
             }
@@ -1218,7 +1248,7 @@ domove(int argcnt, const char *cmd, int
         if ((ncursor = findch(fsavech, argcnt, t, i)) < 0)
             return -1;
         if (sub && t)
-            ncursor++;
+            ncursor += u8mblen(es->cbuf[ncursor]);
         break;

     case 'h':
@@ -1277,16 +1307,16 @@ domove(int argcnt, const char *cmd, int
         ncursor = es->cursor;
         while (ncursor < es->linelen &&
             (i = bracktype(es->cbuf[ncursor])) == 0)
-            ncursor++;
+            ncursor += u8mblen(es->cbuf[ncursor]);
         if (ncursor == es->linelen)
             return -1;
         bcount = 1;
         do {
             if (i > 0) {
-                if (++ncursor >= es->linelen)
+                if ((ncursor += u8mblen(es->cbuf[ncursor])) >= es->linelen)
                     return -1;
             } else {
-                if (--ncursor < 0)
+                if ((ncursor -= u8mblen(es->cbuf[ncursor])) < 0)
                     return -1;
             }
             t = bracktype(es->cbuf[ncursor]);
@@ -1296,7 +1326,7 @@ domove(int argcnt, const char *cmd, int
                 bcount--;
         } while (bcount != 0);
         if (sub && i > 0)
-            ncursor++;
+            ncursor += u8mblen(es->cbuf[ncursor]);
         break;

     default:
@@ -1444,8 +1474,8 @@ edit_reset(char *buf, size_t len)
         pwidth -= prompt_trunc;
     } else
         prompt_trunc = 0;
-    if (!wbuf_len || wbuf_len != x_cols - 3) {
-        wbuf_len = x_cols - 3;
+    if (!wbuf_len || wbuf_len != (x_cols - 3) * 4) { /* The utf-8 has
a maximum of 4 bytes per character. */
+        wbuf_len = (x_cols - 3) * 4;
         wbuf[0] = aresize(wbuf[0], wbuf_len, APERM);
         wbuf[1] = aresize(wbuf[1], wbuf_len, APERM);
     }
@@ -1457,6 +1487,70 @@ edit_reset(char *buf, size_t len)
     holdlen = 0;
 }

+static int unget_char = -1;
+
+static void
+x_vi_ungetc(int c)
+{
+    unget_char = c;
+}
+
+static int
+x_vi_getc(void)
+{
+    int c;
+
+    if (unget_char != -1) {
+        c = unget_char;
+        unget_char = -1;
+    } else
+        c = x_getc();
+
+    return c;
+}
+
+static int
+x_vi_getu8(char firstch, char *buf, int off)
+{
+    int c, cc, i, j, len;
+    char u8c[4];
+
+    c = firstch;
+
+    if (isu8cont(c))
+        return 0;
+    i = 0;
+    u8c[i++] = c;
+
+    if ((c & 0xf8) == 0xf0 && c < 0xf5)
+        len = 4;
+    else if ((c & 0xf0) == 0xe0)
+        len = 3;
+    else if ((c & 0xe0) == 0xc0 && c > 0xc1)
+        len = 2;
+    else
+        len = 1;
+
+    for (j = len; j > 1; j--) {
+        cc = x_vi_getc();
+        if (cc == -1)
+            return 0;
+        if (isu8cont(cc) == 0 ||
+            (c == 0xe0 && j == 3 && cc < 0xa0) ||
+            (c == 0xed && j == 3 && cc & 0x20) ||
+            (c == 0xf4 && j == 4 && cc & 0x30)) {
+            x_vi_ungetc(cc);
+            return 0;
+        }
+        u8c[i++] = cc;
+    }
+
+    for (i = 0; i < len; i++)
+        buf[off + i] = u8c[i];
+
+    return len;
+}
+
 /*
  * this is used for calling x_escape() in complete_word()
  */
@@ -1500,26 +1594,29 @@ static int
 findch(int ch, int cnt, int forw, int incl)
 {
     int    ncursor;
+    char    u8c[4];
+    int     len;

     if (es->linelen == 0)
         return -1;
     ncursor = es->cursor;
+    len = x_vi_getu8(ch, u8c, 0);
     while (cnt--) {
         do {
             if (forw) {
-                if (++ncursor == es->linelen)
+                if ((ncursor += u8mblen(es->cbuf[ncursor])) == es->linelen)
                     return -1;
             } else {
-                if (--ncursor < 0)
+                if ((ncursor -= u8mblen(es->cbuf[ncursor])) < 0)
                     return -1;
             }
-        } while (es->cbuf[ncursor] != ch);
+        } while (memcmp(es->cbuf + ncursor, u8c, len));
     }
     if (!incl) {
         if (forw)
-            ncursor--;
+            while (ncursor > 0 && isu8cont(es->cbuf[--ncursor]));
         else
-            ncursor++;
+            ncursor += u8mblen(es->cbuf[ncursor]);
     }
     return ncursor;
 }
@@ -1774,13 +1871,17 @@ static int
 outofwin(void)
 {
     int    cur, col;
+    int    wc;

     if (es->cursor < es->winleft)
         return 1;
     col = 0;
     cur = es->winleft;
-    while (cur < es->cursor)
-        col = newcol((unsigned char) es->cbuf[cur++], col);
+    while (cur < es->cursor) {
+        wc = u8code(&es->cbuf[cur]);
+        col = newcol(wc, col);
+        cur += u8mblen(es->cbuf[cur]);
+    }
     if (col >= winwidth)
         return 1;
     return 0;
@@ -1792,6 +1893,7 @@ rewindow(void)
     int    tcur, tcol;
     int    holdcur1, holdcol1;
     int    holdcur2, holdcol2;
+    int    wc;

     holdcur1 = holdcur2 = tcur = 0;
     holdcol1 = holdcol2 = tcol = 0;
@@ -1802,11 +1904,15 @@ rewindow(void)
             holdcur2 = tcur;
             holdcol2 = tcol;
         }
-        tcol = newcol((unsigned char) es->cbuf[tcur++], tcol);
+        wc = u8code(&es->cbuf[tcur]);
+        tcol = newcol(wc, tcol);
+        tcur += u8mblen(es->cbuf[tcur]);
+    }
+    while (tcol - holdcol1 > winwidth / 2) {
+        wc = u8code(&es->cbuf[holdcur1]);
+        holdcol1 = newcol(wc, holdcol1);
+        holdcur1 += u8mblen(es->cbuf[holdcur1]);
     }
-    while (tcol - holdcol1 > winwidth / 2)
-        holdcol1 = newcol((unsigned char) es->cbuf[holdcur1++],
-            holdcol1);
     es->winleft = holdcur1;
 }

@@ -1814,11 +1920,7 @@ rewindow(void)
 static int
 newcol(int ch, int col)
 {
-    if (ch == '\t')
-        return (col | 7) + 1;
-    if (isu8cont(ch))
-        return col;
-    return col + char_len(ch);
+    return col + u8width(ch);
 }

 /* Display wb1 assuming that wb2 is currently displayed. */
@@ -1827,7 +1929,6 @@ display(char *wb1, char *wb2, int leftsi
 {
     char    *twb1;    /* pointer into the buffer to display */
     char    *twb2;    /* pointer into the previous display buffer */
-    static int lastb = -1; /* last byte# written from wb1, if UTF-8 */
     int     cur;    /* byte# in the main command line buffer */
     int     col;    /* display column loop variable */
     int     ncol;    /* display column of the cursor */
@@ -1835,6 +1936,9 @@ display(char *wb1, char *wb2, int leftsi
     int     moreright;
     char     mc;    /* new "more character" at the right of window */
     unsigned char ch;
+    int      wc1, wc2, w1col, w2col;
+    int      i, len, wc, u8w;
+

     /*
      * Fill the current display buffer with data from cbuf.
@@ -1846,6 +1950,7 @@ display(char *wb1, char *wb2, int leftsi
     moreright = 0;
     twb1 = wb1;
     while (col < winwidth && cur < es->linelen) {
+        u8w = 1;
         if (cur == es->cursor && leftside)
             ncol = col + pwidth;
         if ((ch = es->cbuf[cur]) == '\t') {
@@ -1869,15 +1974,21 @@ display(char *wb1, char *wb2, int leftsi
                         col++;
                     }
                 } else {
-                    *twb1++ = ch;
-                    if (!isu8cont(ch))
-                        col++;
+                    len = u8mblen(ch);
+                    for (i = 0; i < len; i++)
+                        *twb1++ = es->cbuf[cur + i];
+                    if (len > 1)
+                        wc = u8code(&es->cbuf[cur]);
+                    else
+                        wc = ch;
+                    u8w = u8width(wc);
+                    col += u8w;
                 }
             }
         }
         if (cur == es->cursor && !leftside)
-            ncol = col + pwidth - 1;
-        cur++;
+            ncol = col + pwidth - u8w;
+        cur += u8mblen(ch);
     }
     if (cur == es->cursor)
         ncol = col + pwidth;
@@ -1900,24 +2011,13 @@ display(char *wb1, char *wb2, int leftsi

     col = pwidth;
     cnt = winwidth;
-    for (twb1 = wb1, twb2 = wb2; cnt; twb1++, twb2++) {
-        if (*twb1 != *twb2) {
-
-            /*
-             * When a byte changes in the middle of a UTF-8
-             * character, back up to the start byte, unless
-             * the previous byte was the last one written.
-             */
-
-            if (col > 0 && isu8cont(*twb1)) {
-                col--;
-                if (lastb >= 0 && twb1 == wb1 + lastb + 1)
-                    cur_col = col;
-                else while (twb1 > wb1 && isu8cont(*twb1)) {
-                    twb1--;
-                    twb2--;
-                }
-            }
+    w1col = 0;
+    w2col = 0;
+    for (twb1 = wb1, twb2 = wb2; cnt > 0;) {
+        wc1 = u8code(twb1);
+        wc2 = u8code(twb2);
+        u8w = u8width(wc1);
+        if (wc1 != wc2 || w1col != w2col) {

             if (cur_col != col)
                 ed_mov_opt(col, wb1);
@@ -1926,16 +2026,11 @@ display(char *wb1, char *wb2, int leftsi
              * Always write complete characters, and
              * advance all pointers accordingly.
              */
-
-            x_putc(*twb1);
-            while (isu8cont(twb1[1])) {
-                x_putc(*++twb1);
-                twb2++;
-            }
-            lastb = *twb1 & 0x80 ? twb1 - wb1 : -1;
-            cur_col++;
-        } else if (isu8cont(*twb1))
-            continue;
+            cur_col += u8w;
+            len = u8mblen(*twb1);
+            for (i = 0; i < len; i++)
+                x_putc(twb1[i]);
+        }

         /*
          * For changed continuation bytes, we backed up.
@@ -1943,8 +2038,16 @@ display(char *wb1, char *wb2, int leftsi
          * So, getting here, we had a real column.
          */

-        col++;
-        cnt--;
+        col += u8w;
+        cnt -= u8w;
+        w1col += u8w;
+
+        twb1 += u8mblen(*twb1);
+        while (w1col >= w2col + u8width(wc2)) {
+            w2col += u8width(wc2);
+            twb2 += u8mblen(*twb2);
+            wc2 = u8code(twb2);
+        }
     }

     /* Update the "more character". */
@@ -1965,15 +2068,12 @@ display(char *wb1, char *wb2, int leftsi
         x_putc(mc);
         cur_col++;
         morec = mc;
-        lastb = -1;
     }

     /* Move the cursor to its new position. */

-    if (cur_col != ncol) {
+    if (cur_col != ncol)
         ed_mov_opt(ncol, wb1);
-        lastb = -1;
-    }
 }

 /* Move the display cursor to display column number col. */
@@ -1981,6 +2081,7 @@ static void
 ed_mov_opt(int col, char *wb)
 {
     int ci;
+    int wc;

     /* The cursor is already at the right place. */

@@ -2006,10 +2107,14 @@ ed_mov_opt(int col, char *wb)

     /* Advance the cursor. */

-    for (ci = pwidth; ci < col || isu8cont(*wb);
-         ci = newcol((unsigned char)*wb++, ci))
+    for (ci = pwidth; ci < col || isu8cont(*wb); wb++) {
         if (ci > cur_col || (ci == cur_col && !isu8cont(*wb)))
             x_putc(*wb);
+        if (!isu8cont(*wb)) {
+            wc = u8code(wb);
+            ci = newcol(wc, ci);
+        }
+    }
     cur_col = ci;
 }

@@ -2257,5 +2362,70 @@ static int
 isu8cont(unsigned char c)
 {
     return !Flag(FVISHOW8) && (c & (0x80 | 0x40)) == 0x80;
+}
+
+static int
+u8code(const char *str)
+{
+    int wc, i, len;
+    unsigned char c;
+
+    wc = 0;
+    c = *str++;
+
+    if (Flag(FVISHOW8))
+        return c;
+
+    if ((c & 0xf8) == 0xf0 && c < 0xf5) {
+        wc = c & 0x07;
+        len = 4;
+    } else if ((c & 0xf0) == 0xe0) {
+        wc = c & 0x0f;
+        len = 3;
+    } else if ((c & 0xe0) == 0xc0 && c > 0xc1) {
+        wc = c & 0x1f;
+        len = 2;
+    } else {
+        len = 1;
+        wc = c & 0xff;
+    }
+
+    for (i = 1; i < len; i++) {
+        c = *str++;
+        wc = (wc << 6) | (c & 0x3f);
+    }
+
+    return wc;
+}
+
+static int
+u8mblen(unsigned char c)
+{
+    int len;
+
+    if (Flag(FVISHOW8))
+        return 1;
+
+    if ((c & 0xf8) == 0xf0 && c < 0xf5)
+        len = 4;
+    else if ((c & 0xf0) == 0xe0)
+        len = 3;
+    else if ((c & 0xe0) == 0xc0 && c > 0xc1)
+        len = 2;
+    else
+        len = 1;
+
+    return len;
+}
+
+static int
+u8width(int wc)
+{
+    int w;
+
+    w = wcwidth(wc);
+    if (w == -1)
+        return 1;
+    return w;
 }
 #endif    /* VI */

Reply via email to