Hello, Added support for utf-8 editing in emacs mode and vi mode for ksh, and fixed some utf-8 encoding issues.
it is good? Index: bin/ksh/emacs.c =================================================================== RCS file: /cvs/src/bin/ksh/emacs.c,v retrieving revision 1.89 diff -u -p -u -r1.89 emacs.c --- bin/ksh/emacs.c 9 Oct 2021 21:38:00 -0000 1.89 +++ bin/ksh/emacs.c 12 Aug 2022 01:19:52 -0000 @@ -21,6 +21,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <wchar.h> #ifndef SMALL # include <term.h> # include <curses.h> @@ -102,6 +103,7 @@ static int x_adj_done; static int xx_cols; static int x_col; static int x_displen; +static int x_edp_len; static int x_arg; /* general purpose arg */ static int x_arg_defaulted;/* x_arg not explicitly set; defaulted to 1 */ @@ -133,6 +135,7 @@ static int x_size_str(char *); static int x_size(int); static void x_zots(char *); static void x_zotc(int); +static void x_zotu8c(const char *, int); static void x_load_hist(char **); static int x_search(char *, int, int); static int x_match(char *, char *); @@ -143,12 +146,15 @@ static void x_e_ungetc(int); static int x_e_getc(void); static int x_e_getu8(char *, int); static void x_e_putc(int); +static void x_e_putu8c(const char *, int); static void x_e_puts(const char *); static int x_comment(int); static int x_fold_case(int); static char *x_lastcp(void); static void do_complete(int, Comp_type); static int isu8cont(unsigned char); +static int u8code(const char *); +static int u8mblen(unsigned char); /* proto's for keybindings */ static int x_abort(int); @@ -276,6 +282,54 @@ isu8cont(unsigned char c) } int +u8code(const char *str) +{ + int wc, i, len; + unsigned char c; + + wc = 0; + c = *str++; + + if ((c & 0xf8) == 0xf0 && c < 0xf5) { + wc = c & 0x07; + len = 4; + } else if ((c & 0xf0) == 0xe0) { + wc = c & 0x0f; + len = 3; + } else if ((c & 0xe0) == 0xc0 && c > 0xc1) { + wc = c & 0x1f; + len = 2; + } else { + len = 1; + wc = c & 0xff; + } + + for (i = 1; i < len; i++) { + c = *str++; + wc = (wc << 6) | (c & 0x3f); + } + + return wc; +} + +int +u8mblen(unsigned char c) +{ + int len; + + if ((c & 0xf8) == 0xf0 && c < 0xf5) + len = 4; + else if ((c & 0xf0) == 0xe0) + len = 3; + else if ((c & 0xe0) == 0xc0 && c > 0xc1) + len = 2; + else + len = 1; + + return len; +} + +int x_emacs(char *buf, size_t len) { struct kb_entry *k, *kmatch = NULL; @@ -449,6 +503,7 @@ x_ins(char *s) { char *cp = xcp; int adj = x_adj_done; + int wc; if (x_do_ins(s, strlen(s)) < 0) return -1; @@ -462,8 +517,12 @@ x_ins(char *s) x_zots(cp); if (adj == x_adj_done) { /* has x_adjust() been called? */ /* no */ - for (cp = xlp; cp > xcp; ) - x_bs(*--cp); + for (cp = xlp; cp > xcp; ) { + if (isu8cont(*--cp)) + continue; + wc = u8code(cp); + x_bs(wc); + } } x_adj_ok = 1; @@ -511,6 +570,7 @@ x_delete(int nc, int push) { int i,j; char *cp; + int len, wc; if (nc == 0) return; @@ -531,8 +591,12 @@ x_delete(int nc, int push) cp = xcp; j = 0; i = nc; - while (i--) { - j += x_size((unsigned char)*cp++); + while (i) { + wc = u8code(cp); + len = u8mblen(*cp); + j += x_size(wc); + cp += len; + i -= len; } memmove(xcp, xcp+nc, xep - xcp + 1); /* Copies the null */ x_adj_ok = 0; /* don't redraw */ @@ -555,8 +619,12 @@ x_delete(int nc, int push) /*x_goto(xcp);*/ x_adj_ok = 1; xlp_valid = false; - for (cp = x_lastcp(); cp > xcp; ) - x_bs(*--cp); + for (cp = x_lastcp(); cp > xcp; ) { + if (isu8cont(*--cp)) + continue; + wc = u8code(cp); + x_bs(wc); + } return; } @@ -639,16 +707,34 @@ x_fword(void) static void x_goto(char *cp) { - if (cp < xbp || cp >= (xbp + x_displen)) { + char *ecp; + int i, len, wc; + + for (i = 0, ecp = xbp; ecp < xep && i < x_displen; ecp += len) { + wc = u8code(ecp); + len = u8mblen(*ecp); + if (i + x_size(wc) >= x_displen) + break; + i += x_size(wc); + } + + if (cp < xbp || cp >= ecp) { /* we are heading off screen */ xcp = cp; x_adjust(); } else if (cp < xcp) { /* move back */ - while (cp < xcp) - x_bs((unsigned char)*--xcp); + while (cp < xcp) { + if (isu8cont(*--xcp)) + continue; + wc = u8code(xcp); + x_bs(wc); + } } else if (cp > xcp) { /* move forward */ - while (cp > xcp) - x_zotc((unsigned char)*xcp++); + while (cp > xcp) { + len = u8mblen(*xcp); + x_zotu8c(xcp, len); + xcp += len; + } } } @@ -666,27 +752,37 @@ static int x_size_str(char *cp) { int size = 0; - while (*cp) - size += x_size(*cp++); + int len, wc; + while (*cp) { + wc = u8code(cp); + len = u8mblen(*cp); + size += x_size(wc); + cp += len; + } return size; } static int x_size(int c) { + int w; + if (c=='\b') + return -1; if (c=='\t') return 4; /* Kludge, tabs are always four spaces. */ - if (iscntrl(c)) /* control char */ + if (c < 255 && iscntrl(c)) /* control char */ return 2; - if (isu8cont(c)) - return 0; - return 1; + w = wcwidth(c); + if (w == -1) + return 1; + return w; } static void x_zots(char *str) { int adj = x_adj_done; + int len; if (str > xbuf && isu8cont(*str)) { while (str > xbuf && isu8cont(*str)) @@ -694,8 +790,17 @@ x_zots(char *str) x_e_putc('\b'); } x_lastcp(); - while (*str && str < xlp && adj == x_adj_done) - x_zotc(*str++); + if (str >= xlp && x_edp_len != 0 && adj == x_adj_done) { + len = u8mblen(*str); + x_zotu8c(str, len); + str += len; + } + + while (*str && str < xlp && adj == x_adj_done) { + len = u8mblen(*str); + x_zotu8c(str, len); + str += len; + } } static void @@ -711,6 +816,15 @@ x_zotc(int c) x_e_putc(c); } +static void +x_zotu8c(const char *str, int len) +{ + if (len == 1) + return x_zotc(*str); + else + return x_e_putu8c(str, len); +} + static int x_mv_back(int c) { @@ -1032,6 +1146,7 @@ x_redraw(int limit) { int i, j, truncate = 0; char *cp; + int len, wc; x_adj_ok = 0; if (limit == -2) { @@ -1071,9 +1186,15 @@ x_redraw(int limit) limit = xx_cols; if (limit >= 0) { if (xep > xlp) - i = 0; /* we fill the line */ - else - i = limit - (xlp - xbp); + i = x_edp_len; /* we fill the line */ + else { + i = limit; + for (cp = xbp; cp < xlp; cp += len) { + wc = u8code(cp); + len = u8mblen(*cp); + i -= x_size(wc); + } + } for (j = 0; j < i && x_col < (xx_cols - 2); j++) x_e_putc(' '); @@ -1090,8 +1211,12 @@ x_redraw(int limit) while (j--) x_e_putc('\b'); } - for (cp = xlp; cp > xcp; ) - x_bs(*--cp); + for (cp = xcp; cp < xlp;) { + wc = u8code(cp); + len = u8mblen(*cp); + x_bs(wc); + cp += len; + } x_adj_ok = 1; #ifdef DEBUG x_flush(); @@ -1102,7 +1227,10 @@ x_redraw(int limit) static int x_transpose(int c) { - char tmp; + char tmpu8[4]; + int blen; + int bpos, bbpos; + int i; /* What transpose is meant to do seems to be up for debate. This * is a general summary of the options; the text is abcd with the @@ -1128,25 +1256,41 @@ x_transpose(int c) /* Gosling/Unipress emacs style: Swap two characters before the * cursor, do not change cursor position */ - x_bs(xcp[-1]); - x_bs(xcp[-2]); - x_zotc(xcp[-1]); - x_zotc(xcp[-2]); - tmp = xcp[-1]; - xcp[-1] = xcp[-2]; - xcp[-2] = tmp; + bpos = -1; + while (isu8cont(xcp[bpos])) + bpos--; + bbpos = bpos - 1; + while (isu8cont(xcp[bbpos])) + bbpos--; + x_bs(u8code(xcp + bpos)); + x_bs(u8code(xcp + bbpos)); + blen = u8mblen(xcp[bpos]); + x_zotu8c(xcp + bpos, blen); + i = u8mblen(xcp[bbpos]); + x_zotu8c(xcp + bbpos, i); + memmove(tmpu8, xcp + bbpos, i); + memmove(xcp + bbpos, xcp + bpos, blen); + memmove(xcp + bbpos + blen, tmpu8, i); } else { /* GNU emacs style: Swap the characters before and under the * cursor, move cursor position along one. */ - x_bs(xcp[-1]); - x_zotc(xcp[0]); - x_zotc(xcp[-1]); - tmp = xcp[-1]; - xcp[-1] = xcp[0]; - xcp[0] = tmp; - x_bs(xcp[0]); - x_goto(xcp + 1); + bpos = -1; + while (isu8cont(xcp[bpos])) + bpos--; + x_bs(u8code(xcp + bpos)); + blen = u8mblen(xcp[0]); + x_zotu8c(xcp, blen); + i = u8mblen(xcp[bpos]); + x_zotu8c(xcp + bpos, i); + + memmove(tmpu8, xcp + bpos, i); + memmove(xcp + bpos, xcp, blen); + memmove(xcp + bpos + blen, tmpu8, i); + + if (blen >= i) + x_bs(u8code(xcp + bpos + blen)); + x_goto(xcp + bpos + blen + i); } return KSTD; } @@ -1802,12 +1946,22 @@ do_complete(int flags, /* XCF_{COMMAND,F static void x_adjust(void) { + int i; + int wc; + x_adj_done++; /* flag the fact that we were called. */ /* * we had a problem if the prompt length > xx_cols / 2 */ - if ((xbp = xcp - (x_displen / 2)) < xbuf) - xbp = xbuf; + for (i = 0, xbp = xcp; xbp > xbuf && i < (x_displen / 2); xbp--) { + if (isu8cont(*xbp)) + continue; + + wc = u8code(xbp); + if (i + x_size(wc) >= (x_displen / 2)) + break; + i += x_size(wc); + } xlp_valid = false; x_redraw(xx_cols); x_flush(); @@ -1911,6 +2065,24 @@ x_e_putc(int c) x_adjust(); } +static void +x_e_putu8c(const char *str, int len) { + int i; + int wc, w; + + wc = u8code(str); + w = x_size(wc); + if (x_col < xx_cols - w + 1) { + for (i = 0; i < len; i++) { + x_putc(str[i]); + } + x_col += w; + } + + if (x_adj_ok && (x_col < 0 || x_col >= (xx_cols - 2 - w + 1))) + x_adjust(); +} + #ifdef DEBUG static int x_debug_info(int c) @@ -1933,9 +2105,16 @@ static void x_e_puts(const char *s) { int adj = x_adj_done; + int len; - while (*s && adj == x_adj_done) - x_e_putc(*s++); + while (*s && adj == x_adj_done) { + len = u8mblen(*s); + if (len == 1) + x_e_putc(*s); + else + x_e_putu8c(s, len); + s += len; + } } /* NAME: @@ -2156,10 +2335,22 @@ x_lastcp(void) { char *rcp; int i; + int len; + int wc; if (!xlp_valid) { - for (i = 0, rcp = xbp; rcp < xep && i < x_displen; rcp++) - i += x_size((unsigned char)*rcp); + x_edp_len = 0; + for (i = 0, rcp = xbp; rcp < xep && i < x_displen; rcp += len) { + len = u8mblen(*rcp); + wc = u8code(rcp); + + if (i + x_size(wc) > x_displen) { + x_edp_len = x_displen - i; + break; + } + if (rcp + len <= xep) + i += x_size(wc); + } xlp = rcp; } xlp_valid = true; Index: bin/ksh/main.c =================================================================== RCS file: /cvs/src/bin/ksh/main.c,v retrieving revision 1.98 diff -u -p -u -r1.98 main.c --- bin/ksh/main.c 28 Jun 2019 13:34:59 -0000 1.98 +++ bin/ksh/main.c 12 Aug 2022 01:19:52 -0000 @@ -8,6 +8,7 @@ #include <errno.h> #include <fcntl.h> +#include <locale.h> #include <paths.h> #include <pwd.h> #include <stdio.h> @@ -145,6 +146,8 @@ main(int argc, char *argv[]) pid_t ppid; kshname = argv[0]; + + setlocale(LC_CTYPE, ""); if (issetugid()) { /* could later drop privileges */ if (pledge("stdio rpath wpath cpath fattr flock getpw proc " Index: bin/ksh/vi.c =================================================================== RCS file: /cvs/src/bin/ksh/vi.c,v retrieving revision 1.60 diff -u -p -u -r1.60 vi.c --- bin/ksh/vi.c 12 Mar 2021 02:10:25 -0000 1.60 +++ bin/ksh/vi.c 12 Aug 2022 01:19:53 -0000 @@ -14,6 +14,7 @@ #include <ctype.h> #include <stdlib.h> #include <string.h> +#include <wchar.h> #ifndef SMALL # include <term.h> # include <curses.h> @@ -73,8 +74,14 @@ static void x_vi_zotc(int); static void vi_pprompt(int); static void vi_error(void); static void vi_macro_reset(void); +static void x_vi_ungetc(int); +static int x_vi_getc(void); +static int x_vi_getu8(char, char *, int); static int x_vi_putbuf(const char *, size_t); static int isu8cont(unsigned char); +static int u8code(const char *); +static int u8mblen(unsigned char); +static int u8width(int); #define C_ 0x1 /* a valid command that isn't a M_, E_, U_ */ #define M_ 0x2 /* movement command (h, l, etc.) */ @@ -213,10 +220,10 @@ x_vi(char *buf, size_t len) continue; /* must be the end of all the macros */ vi_macro_reset(); - c = x_getc(); + c = x_vi_getc(); } } else - c = x_getc(); + c = x_vi_getc(); if (c == -1) break; @@ -260,6 +267,8 @@ vi_hook(int ch) { static char curcmd[MAXVICMD], locpat[SRCHLEN]; static int cmdlen, argc1, argc2; + int i, len; + char u8c[4]; switch (state) { @@ -314,8 +323,11 @@ vi_hook(int ch) if (is_bad(ch)) { del_range(es->cursor, es->cursor + 1); vi_error(); - } else - es->cbuf[es->cursor++] = ch; + } else { + len = x_vi_getu8(ch, es->cbuf, es->cursor); + es->cursor += len; + es->linelen += len - 1; + } refresh_line(1); state = VNORMAL; break; @@ -412,7 +424,6 @@ vi_hook(int ch) return 0; } else if (ch == edchars.werase) { struct edstate new_es, *save_es; - int i; int n = srchlen; new_es.cursor = n; @@ -430,10 +441,16 @@ vi_hook(int ch) refresh_line(0); return 0; } else { - if (srchlen == SRCHLEN - 1) + if (srchlen >= SRCHLEN - 1) vi_error(); else { - locpat[srchlen++] = ch; + len = x_vi_getu8(ch, u8c, 0); + for (i = 0; i < len; i++) { + locpat[srchlen++] = u8c[i]; + if (srchlen >= SRCHLEN - 1) + vi_error(); + } + if ((ch & 0x80) && Flag(FVISHOW8)) { if (es->linelen + 2 > es->cbufsize) vi_error(); @@ -449,7 +466,8 @@ vi_hook(int ch) } else { if (es->linelen >= es->cbufsize) vi_error(); - es->cbuf[es->linelen++] = ch; + for (i = 0; i < len; i++) + es->cbuf[es->linelen++] = u8c[i]; } es->cursor = es->linelen; refresh_line(0); @@ -537,7 +555,9 @@ vi_reset(char *buf, size_t len) static int nextstate(int ch) { - if (is_extend(ch)) + if (ch & 0x80) + return VFAIL; + else if (is_extend(ch)) return VEXTCMD; else if (is_srch(ch)) return VSEARCH; @@ -555,6 +575,9 @@ static int vi_insert(int ch) { int tcursor; + int i, len; + char u8c[4]; + if (ch == edchars.erase || ch == CTRL('h')) { if (insert == REPLACE) { @@ -674,17 +697,21 @@ vi_insert(int ch) /* End nonstandard vi commands } */ default: - if (es->linelen >= es->cbufsize - 1) + len = x_vi_getu8(ch, u8c, 0); + + if (es->linelen >= es->cbufsize - len) return -1; - ibuf[inslen++] = ch; + for (i = 0; i < len; i++) + ibuf[inslen++] = u8c[i]; if (insert == INSERT) { - memmove(&es->cbuf[es->cursor+1], &es->cbuf[es->cursor], + memmove(&es->cbuf[es->cursor+len], &es->cbuf[es->cursor], es->linelen - es->cursor); - es->linelen++; + es->linelen += len; } - es->cbuf[es->cursor++] = ch; + for (i = 0; i < len; i++) + es->cbuf[es->cursor++] = u8c[i]; if (insert == REPLACE && es->cursor > es->linelen) - es->linelen++; + es->linelen += len; expanded = NONE; } return 0; @@ -696,6 +723,8 @@ vi_cmd(int argcnt, const char *cmd) int ncursor; int cur, c1, c2, c3 = 0; int any; + int len; + char u8c[4]; struct edstate *t; if (argcnt == 0 && !is_zerocount(*cmd)) @@ -769,7 +798,7 @@ vi_cmd(int argcnt, const char *cmd) case 'a': modified = 1; hnum = hlast; if (es->linelen != 0) - while (isu8cont(es->cbuf[++es->cursor])) + while (isu8cont(es->cbuf[++es->cursor]) && es->cursor < es->linelen) /* fix cursor great linelen */ continue; insert = INSERT; break; @@ -835,11 +864,11 @@ vi_cmd(int argcnt, const char *cmd) case 'p': modified = 1; hnum = hlast; if (es->linelen != 0) - es->cursor++; + es->cursor += u8mblen(es->cbuf[es->cursor]); while (putbuf(ybuf, yanklen, 0) == 0 && --argcnt > 0) ; - if (es->cursor != 0) - es->cursor--; + while (es->cursor != 0 && isu8cont(es->cbuf[--es->cursor])) + ; if (argcnt != 0) return -1; break; @@ -849,8 +878,8 @@ vi_cmd(int argcnt, const char *cmd) any = 0; while (putbuf(ybuf, yanklen, 0) == 0 && --argcnt > 0) any = 1; - if (any && es->cursor != 0) - es->cursor--; + while (any && es->cursor != 0 && isu8cont(es->cbuf[--es->cursor])) + ; if (argcnt != 0) return -1; break; @@ -864,8 +893,8 @@ vi_cmd(int argcnt, const char *cmd) case 'D': yank_range(es->cursor, es->linelen); del_range(es->cursor, es->linelen); - if (es->cursor != 0) - es->cursor--; + while (es->cursor != 0 && isu8cont(es->cbuf[--es->cursor])) + ; break; case 'g': @@ -937,8 +966,9 @@ vi_cmd(int argcnt, const char *cmd) return -1; del_range(es->cursor, cur); + len = x_vi_getu8(cmd[1], u8c, 0); while (argcnt-- > 0) - putbuf(&cmd[1], 1, 0); + putbuf(u8c, len, 0); while (es->cursor > 0) if (!isu8cont(es->cbuf[--es->cursor])) break; @@ -1090,7 +1120,7 @@ vi_cmd(int argcnt, const char *cmd) } modified = 1; hnum = hlast; if (es->cursor != es->linelen) - es->cursor++; + es->cursor += u8mblen(es->cbuf[es->cursor]); while (*p && !issp(*p)) { argcnt++; p++; @@ -1100,8 +1130,8 @@ vi_cmd(int argcnt, const char *cmd) else if (putbuf(sp, argcnt, 0) != 0) argcnt = -1; if (argcnt < 0) { - if (es->cursor != 0) - es->cursor--; + while (es->cursor != 0 && isu8cont(es->cbuf[--es->cursor])) + ; return -1; } insert = INSERT; @@ -1125,8 +1155,8 @@ vi_cmd(int argcnt, const char *cmd) modified = 1; hnum = hlast; *p = tolower(c); } - if (es->cursor < es->linelen - 1) - es->cursor++; + if (es->cursor < es->linelen - u8mblen(es->cbuf[es->cursor])) + es->cursor += u8mblen(es->cbuf[es->cursor]); } break; } @@ -1218,7 +1248,7 @@ domove(int argcnt, const char *cmd, int if ((ncursor = findch(fsavech, argcnt, t, i)) < 0) return -1; if (sub && t) - ncursor++; + ncursor += u8mblen(es->cbuf[ncursor]); break; case 'h': @@ -1277,16 +1307,16 @@ domove(int argcnt, const char *cmd, int ncursor = es->cursor; while (ncursor < es->linelen && (i = bracktype(es->cbuf[ncursor])) == 0) - ncursor++; + ncursor += u8mblen(es->cbuf[ncursor]); if (ncursor == es->linelen) return -1; bcount = 1; do { if (i > 0) { - if (++ncursor >= es->linelen) + if ((ncursor += u8mblen(es->cbuf[ncursor])) >= es->linelen) return -1; } else { - if (--ncursor < 0) + if ((ncursor -= u8mblen(es->cbuf[ncursor])) < 0) return -1; } t = bracktype(es->cbuf[ncursor]); @@ -1296,7 +1326,7 @@ domove(int argcnt, const char *cmd, int bcount--; } while (bcount != 0); if (sub && i > 0) - ncursor++; + ncursor += u8mblen(es->cbuf[ncursor]); break; default: @@ -1444,8 +1474,8 @@ edit_reset(char *buf, size_t len) pwidth -= prompt_trunc; } else prompt_trunc = 0; - if (!wbuf_len || wbuf_len != x_cols - 3) { - wbuf_len = x_cols - 3; + if (!wbuf_len || wbuf_len != (x_cols - 3) * 4) { /* The utf-8 has a maximum of 4 bytes per character. */ + wbuf_len = (x_cols - 3) * 4; wbuf[0] = aresize(wbuf[0], wbuf_len, APERM); wbuf[1] = aresize(wbuf[1], wbuf_len, APERM); } @@ -1457,6 +1487,70 @@ edit_reset(char *buf, size_t len) holdlen = 0; } +static int unget_char = -1; + +static void +x_vi_ungetc(int c) +{ + unget_char = c; +} + +static int +x_vi_getc(void) +{ + int c; + + if (unget_char != -1) { + c = unget_char; + unget_char = -1; + } else + c = x_getc(); + + return c; +} + +static int +x_vi_getu8(char firstch, char *buf, int off) +{ + int c, cc, i, j, len; + char u8c[4]; + + c = firstch; + + if (isu8cont(c)) + return 0; + i = 0; + u8c[i++] = c; + + if ((c & 0xf8) == 0xf0 && c < 0xf5) + len = 4; + else if ((c & 0xf0) == 0xe0) + len = 3; + else if ((c & 0xe0) == 0xc0 && c > 0xc1) + len = 2; + else + len = 1; + + for (j = len; j > 1; j--) { + cc = x_vi_getc(); + if (cc == -1) + return 0; + if (isu8cont(cc) == 0 || + (c == 0xe0 && j == 3 && cc < 0xa0) || + (c == 0xed && j == 3 && cc & 0x20) || + (c == 0xf4 && j == 4 && cc & 0x30)) { + x_vi_ungetc(cc); + return 0; + } + u8c[i++] = cc; + } + + for (i = 0; i < len; i++) + buf[off + i] = u8c[i]; + + return len; +} + /* * this is used for calling x_escape() in complete_word() */ @@ -1500,26 +1594,29 @@ static int findch(int ch, int cnt, int forw, int incl) { int ncursor; + char u8c[4]; + int len; if (es->linelen == 0) return -1; ncursor = es->cursor; + len = x_vi_getu8(ch, u8c, 0); while (cnt--) { do { if (forw) { - if (++ncursor == es->linelen) + if ((ncursor += u8mblen(es->cbuf[ncursor])) == es->linelen) return -1; } else { - if (--ncursor < 0) + if ((ncursor -= u8mblen(es->cbuf[ncursor])) < 0) return -1; } - } while (es->cbuf[ncursor] != ch); + } while (memcmp(es->cbuf + ncursor, u8c, len)); } if (!incl) { if (forw) - ncursor--; + while (ncursor > 0 && isu8cont(es->cbuf[--ncursor])); else - ncursor++; + ncursor += u8mblen(es->cbuf[ncursor]); } return ncursor; } @@ -1774,13 +1871,17 @@ static int outofwin(void) { int cur, col; + int wc; if (es->cursor < es->winleft) return 1; col = 0; cur = es->winleft; - while (cur < es->cursor) - col = newcol((unsigned char) es->cbuf[cur++], col); + while (cur < es->cursor) { + wc = u8code(&es->cbuf[cur]); + col = newcol(wc, col); + cur += u8mblen(es->cbuf[cur]); + } if (col >= winwidth) return 1; return 0; @@ -1792,6 +1893,7 @@ rewindow(void) int tcur, tcol; int holdcur1, holdcol1; int holdcur2, holdcol2; + int wc; holdcur1 = holdcur2 = tcur = 0; holdcol1 = holdcol2 = tcol = 0; @@ -1802,11 +1904,15 @@ rewindow(void) holdcur2 = tcur; holdcol2 = tcol; } - tcol = newcol((unsigned char) es->cbuf[tcur++], tcol); + wc = u8code(&es->cbuf[tcur]); + tcol = newcol(wc, tcol); + tcur += u8mblen(es->cbuf[tcur]); + } + while (tcol - holdcol1 > winwidth / 2) { + wc = u8code(&es->cbuf[holdcur1]); + holdcol1 = newcol(wc, holdcol1); + holdcur1 += u8mblen(es->cbuf[holdcur1]); } - while (tcol - holdcol1 > winwidth / 2) - holdcol1 = newcol((unsigned char) es->cbuf[holdcur1++], - holdcol1); es->winleft = holdcur1; } @@ -1814,11 +1920,7 @@ rewindow(void) static int newcol(int ch, int col) { - if (ch == '\t') - return (col | 7) + 1; - if (isu8cont(ch)) - return col; - return col + char_len(ch); + return col + u8width(ch); } /* Display wb1 assuming that wb2 is currently displayed. */ @@ -1827,7 +1929,6 @@ display(char *wb1, char *wb2, int leftsi { char *twb1; /* pointer into the buffer to display */ char *twb2; /* pointer into the previous display buffer */ - static int lastb = -1; /* last byte# written from wb1, if UTF-8 */ int cur; /* byte# in the main command line buffer */ int col; /* display column loop variable */ int ncol; /* display column of the cursor */ @@ -1835,6 +1936,9 @@ display(char *wb1, char *wb2, int leftsi int moreright; char mc; /* new "more character" at the right of window */ unsigned char ch; + int wc1, wc2, w1col, w2col; + int i, len, wc, u8w; + /* * Fill the current display buffer with data from cbuf. @@ -1846,6 +1950,7 @@ display(char *wb1, char *wb2, int leftsi moreright = 0; twb1 = wb1; while (col < winwidth && cur < es->linelen) { + u8w = 1; if (cur == es->cursor && leftside) ncol = col + pwidth; if ((ch = es->cbuf[cur]) == '\t') { @@ -1869,15 +1974,21 @@ display(char *wb1, char *wb2, int leftsi col++; } } else { - *twb1++ = ch; - if (!isu8cont(ch)) - col++; + len = u8mblen(ch); + for (i = 0; i < len; i++) + *twb1++ = es->cbuf[cur + i]; + if (len > 1) + wc = u8code(&es->cbuf[cur]); + else + wc = ch; + u8w = u8width(wc); + col += u8w; } } } if (cur == es->cursor && !leftside) - ncol = col + pwidth - 1; - cur++; + ncol = col + pwidth - u8w; + cur += u8mblen(ch); } if (cur == es->cursor) ncol = col + pwidth; @@ -1900,24 +2011,13 @@ display(char *wb1, char *wb2, int leftsi col = pwidth; cnt = winwidth; - for (twb1 = wb1, twb2 = wb2; cnt; twb1++, twb2++) { - if (*twb1 != *twb2) { - - /* - * When a byte changes in the middle of a UTF-8 - * character, back up to the start byte, unless - * the previous byte was the last one written. - */ - - if (col > 0 && isu8cont(*twb1)) { - col--; - if (lastb >= 0 && twb1 == wb1 + lastb + 1) - cur_col = col; - else while (twb1 > wb1 && isu8cont(*twb1)) { - twb1--; - twb2--; - } - } + w1col = 0; + w2col = 0; + for (twb1 = wb1, twb2 = wb2; cnt > 0;) { + wc1 = u8code(twb1); + wc2 = u8code(twb2); + u8w = u8width(wc1); + if (wc1 != wc2 || w1col != w2col) { if (cur_col != col) ed_mov_opt(col, wb1); @@ -1926,16 +2026,11 @@ display(char *wb1, char *wb2, int leftsi * Always write complete characters, and * advance all pointers accordingly. */ - - x_putc(*twb1); - while (isu8cont(twb1[1])) { - x_putc(*++twb1); - twb2++; - } - lastb = *twb1 & 0x80 ? twb1 - wb1 : -1; - cur_col++; - } else if (isu8cont(*twb1)) - continue; + cur_col += u8w; + len = u8mblen(*twb1); + for (i = 0; i < len; i++) + x_putc(twb1[i]); + } /* * For changed continuation bytes, we backed up. @@ -1943,8 +2038,16 @@ display(char *wb1, char *wb2, int leftsi * So, getting here, we had a real column. */ - col++; - cnt--; + col += u8w; + cnt -= u8w; + w1col += u8w; + + twb1 += u8mblen(*twb1); + while (w1col >= w2col + u8width(wc2)) { + w2col += u8width(wc2); + twb2 += u8mblen(*twb2); + wc2 = u8code(twb2); + } } /* Update the "more character". */ @@ -1965,15 +2068,12 @@ display(char *wb1, char *wb2, int leftsi x_putc(mc); cur_col++; morec = mc; - lastb = -1; } /* Move the cursor to its new position. */ - if (cur_col != ncol) { + if (cur_col != ncol) ed_mov_opt(ncol, wb1); - lastb = -1; - } } /* Move the display cursor to display column number col. */ @@ -1981,6 +2081,7 @@ static void ed_mov_opt(int col, char *wb) { int ci; + int wc; /* The cursor is already at the right place. */ @@ -2006,10 +2107,14 @@ ed_mov_opt(int col, char *wb) /* Advance the cursor. */ - for (ci = pwidth; ci < col || isu8cont(*wb); - ci = newcol((unsigned char)*wb++, ci)) + for (ci = pwidth; ci < col || isu8cont(*wb); wb++) { if (ci > cur_col || (ci == cur_col && !isu8cont(*wb))) x_putc(*wb); + if (!isu8cont(*wb)) { + wc = u8code(wb); + ci = newcol(wc, ci); + } + } cur_col = ci; } @@ -2257,5 +2362,70 @@ static int isu8cont(unsigned char c) { return !Flag(FVISHOW8) && (c & (0x80 | 0x40)) == 0x80; +} + +static int +u8code(const char *str) +{ + int wc, i, len; + unsigned char c; + + wc = 0; + c = *str++; + + if (Flag(FVISHOW8)) + return c; + + if ((c & 0xf8) == 0xf0 && c < 0xf5) { + wc = c & 0x07; + len = 4; + } else if ((c & 0xf0) == 0xe0) { + wc = c & 0x0f; + len = 3; + } else if ((c & 0xe0) == 0xc0 && c > 0xc1) { + wc = c & 0x1f; + len = 2; + } else { + len = 1; + wc = c & 0xff; + } + + for (i = 1; i < len; i++) { + c = *str++; + wc = (wc << 6) | (c & 0x3f); + } + + return wc; +} + +static int +u8mblen(unsigned char c) +{ + int len; + + if (Flag(FVISHOW8)) + return 1; + + if ((c & 0xf8) == 0xf0 && c < 0xf5) + len = 4; + else if ((c & 0xf0) == 0xe0) + len = 3; + else if ((c & 0xe0) == 0xc0 && c > 0xc1) + len = 2; + else + len = 1; + + return len; +} + +static int +u8width(int wc) +{ + int w; + + w = wcwidth(wc); + if (w == -1) + return 1; + return w; } #endif /* VI */