Comments inline. I think this still needs a lot of work...

I've done one pass over this to weed out some obvious problems.
There might be many I've missed.

Not going to do any run-time testing because emacs/mg break my fingers ;)

On Mon, May 28, 2018 at 09:32:10PM +0300, Leonid Bobrov wrote:
> Index: basic.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/basic.c,v
> retrieving revision 1.47
> diff -u -p -u -p -r1.47 basic.c
> --- basic.c   10 Oct 2015 09:13:14 -0000      1.47
> +++ basic.c   28 May 2018 18:08:10 -0000
> @@ -18,6 +18,7 @@
>  #include <signal.h>
>  #include <stdio.h>
>  #include <stdlib.h>
> +#include <wchar.h>
>  
>  #include "def.h"
>  
> @@ -269,12 +270,25 @@ setgoal(void)
>  int
>  getgoal(struct line *dlp)
>  {
> -     int c, i, col = 0;
> -     char tmp[5];
> +     return getbyteofcol(dlp, 0, curgoal);
> +}
>  
> +/*
> + * Return the byte offset within lp that is targetcol columns beyond
> + * startbyte
> + */
> +size_t
> +getbyteofcol(const struct line *lp, const size_t startbyte,
> +             const size_t targetcol)
> +{
> +     int c;
> +     size_t i, col = 0;
> +     char tmp[5];
> +     size_t advance_by = 1;
>  
> -     for (i = 0; i < llength(dlp); i++) {
> -             c = lgetc(dlp, i);
> +     for (i = startbyte; i < llength(lp); i += advance_by) {
> +             advance_by = 1;
> +             c = lgetc(lp, i);
>               if (c == '\t'
>  #ifdef       NOTAB
>                   && !(curbp->b_flag & BFNOTAB)
> @@ -284,16 +298,84 @@ getgoal(struct line *dlp)
>                       col++;
>               } else if (ISCTRL(c) != FALSE) {
>                       col += 2;
> -             } else if (isprint(c))
> +             } else if (isprint(c)) {
>                       col++;
> -             else {
> +             } else if (!(curbp->b_flag & BFSHOWRAW)) {
> +                     mbstate_t mbs = { 0 };
> +                     wchar_t wc = 0;
> +                     size_t consumed = mbrtowc(&wc, &lp->l_text[i],
> +                                               llength(lp) - i, &mbs);
> +                     int width = -1;
> +                     if (consumed < (size_t) -2) {

I think this should say: consumed > 0

Since both (size_t)-1 and (size_t)-2 indicate special error conditions
they should only be compared the with == operator, in my opinion.

Please read the mbrtowc man page if you haven't already.
It covers error conditions extensively.

A good idiom would be something like:

        size_t ret = mbrtowc(...)
        if (ret == (size_t)-1) {
                /* handle decoding error */
        } else if (ret == (size_t)-2) {
                /* handle incomplete sequence */
        } else if (ret == 0) {
                /* handle end of line */
        } else {
                /* handle character which was parsed */
        }

> +                             width = wcwidth(wc);
> +                     }
> +                     if (width >= 0) {

You should explicitly deal with width == -1 here as well.
A negative width means this line contains a non-printable character.
But that doesn't mean this character won't have any effect when printed.
You might want to narrow down the truth about such a character
with iswcntrl() and friends and take some appropriate action.

> +                             col += width;
> +                             advance_by = consumed;

What if consumed == (size_t)-1 or (size_t)-2 here?

In particular, you need to decide what to do with illegal byte sequences.
An editor should probably display them in some escaped form and continue
processing valid text beyond. UTF-8 is self-synchronizing so this should
not be too difficult. Printing such bytes in an escaped form might
require more columns than a valid byte sequence would.

Can it happen at any step during editing with mg that a valid multi-byte
character gets split by a newline? That would cause (size_t)-2 here since
this function is processing only one line. An editor should avoid introducing
this problem, but should be able to cope with files which already have this
problem when they are opened.
A good design would process a file such that any existing invalid multi-byte
characters will at least remain as they were when the file was opened, or
at best can be repaired by the user (do not get tempted to try to magically
"fix" the data).

> +                     } else {
> +                             col += snprintf(tmp, sizeof(tmp), "\\%o", c);

What if snprintf returns -1 here?

> +                     }
> +             } else {
>                       col += snprintf(tmp, sizeof(tmp), "\\%o", c);

What if snprintf returns -1 here?

>               }
> -             if (col > curgoal)
> +             if (col > targetcol)
>                       break;
>       }
>       return (i);
>  }
> +
> +/*
> + * Return the column at which specified offset byte would appear, if
> + * this were part of a longer string printed by vtputs, starting at
> + * intial_col
> + */
> +size_t
> +getcolofbyte(const struct line *lp, const size_t startbyte,
> +             const size_t initial_col, const size_t targetoffset)
> +{
> +     int c;
> +     size_t i, col = initial_col;
> +     char tmp[5];
> +     size_t advance_by = 1;
> +
> +     for (i = startbyte; i < llength(lp); i += advance_by) {
> +             if (i >= targetoffset)
> +                     break;
> +             advance_by = 1;
> +             c = lgetc(lp, i);
> +             if (c == '\t'
> +#ifdef       NOTAB
> +                 && !(curbp->b_flag & BFNOTAB)
> +#endif
> +                     ) {
> +                     col |= 0x07;
> +                     col++;
> +             } else if (ISCTRL(c) != FALSE) {
> +                     col += 2;
> +             } else if (isprint(c)) {
> +                     col++;
> +             } else if (!(curbp->b_flag & BFSHOWRAW)) {
> +                     mbstate_t mbs = { 0 };
> +                     wchar_t wc = 0;
> +                     size_t consumed = mbrtowc(&wc, &lp->l_text[i],
> +                                               llength(lp) - i, &mbs);
> +                     int width = -1;
> +                     if (consumed < (size_t) -2) {

Same concern as above: consumed > 0

> +                             width = wcwidth(wc);
> +                     }
> +                     if (width >= 0) {
> +                             col += width;
> +     while (cp1 != vvp->v_text && !isprint(*cp1) &&
> +            mbrtowc(&wc, cp1, (lastbyte - cp1), &mbs) >= (size_t) -2) {
        > +                             advance_by = consumed;

Agaile (cp1 != vvp->v_text && !isprint(*cp1) &&
> +            mbrtowc(&wc, cp1, (lastbyte - cp1), &mbs) >= (size_t) -2) {
> +     while (cp1 != vvp->v_text && !isprint(*cp1) &&
> +            mbrtowc(&wc, cp1, (lastbyte - cp1), &mbs) >= (size_t) -2) {
n, consumed could represent a -1 or -2 error from mbrtowc() at this point.

> +                     } else {
> +                             col += snprintf(tmp, sizeof(tmp), "\\%o", c);

Again, snprintf can return -1.

> +                     }
> +             } else {
> +                     col += snprintf(tmp, sizeof(tmp), "\\%o", c);

Again, snprintf can return -1.

> +             }
> +     }
> +     return (col);
> +}
> +
>  
>  /*
>   * Scroll forward by a specified number
> Index: cmode.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/cmode.c,v
> retrieving revision 1.16
> diff -u -p -u -p -r1.16 cmode.c
> --- cmode.c   26 Sep 2015 21:51:58 -0000      1.16
> +++ cmode.c   28 May 2018 18:08:10 -0000
> @@ -14,6 +14,7 @@
>  #include <ctype.h>
>  #include <signal.h>
>  #include <stdio.h>
> +#include <wchar.h>
>  
>  #include "def.h"
>  #include "funmap.h"
> @@ -419,10 +420,25 @@ findcolpos(const struct buffer *bp, cons
>                       ) {
>                       col |= 0x07;
>                       col++;
> -             } else if (ISCTRL(c) != FALSE)
> +             } else if (ISCTRL(c) != FALSE) {
>                       col += 2;
> -             else if (isprint(c)) {
> +             } else if (isprint(c)) {
>                       col++;
> +             } else if (!(bp->b_flag & BFSHOWRAW)) {
> +                     mbstate_t mbs = { 0 };
> +                     wchar_t wc = 0;
> +                     size_t consumed = mbrtowc(&wc, &lp->l_text[i],
> +                                               llength(lp) - i, &mbs);
> +                     int width = -1;
> +                     if (consumed < (size_t) -2) {

Same.

> +                             width = wcwidth(wc);
> +                     }
> +                     if (width >= 0) {
> +                             col += width;
> +                             i += (consumed - 1);
> +                     } else {
> +                             col += snprintf(tmp, sizeof(tmp), "\\%o", c);

Same.

> +                     }
>               } else {
>                       col += snprintf(tmp, sizeof(tmp), "\\%o", c);

Same.

This pattern has now occured 3 times in this diff.
Consider adding a function to avoid code duplication so that
bugs can be fixed in one place.

>               }
> Index: def.h
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/def.h,v
> retrieving revision 1.155
> diff -u -p -u -p -r1.155 def.h
> --- def.h     14 Apr 2016 17:05:32 -0000      1.155
> +++ def.h     28 May 2018 18:08:10 -0000
> @@ -263,7 +263,7 @@ struct buffer {
>       int              b_marko;       /* ditto for the "mark"          */
>       short            b_nmodes;      /* number of non-fundamental modes */
>       char             b_nwnd;        /* Count of windows on buffer    */
> -     char             b_flag;        /* Flags                         */
> +     short            b_flag;        /* Flags                         */
>       char             b_fname[NFILEN]; /* File name                   */
>       char             b_cwd[NFILEN]; /* working directory             */
>       struct fileinfo  b_fi;          /* File attributes               */
> @@ -290,6 +290,7 @@ struct buffer {
>  #define BFDIRTY     0x20             /* Buffer was modified elsewhere */
>  #define BFIGNDIRTY  0x40             /* Ignore modifications          */
>  #define BFDIREDDEL  0x80             /* Dired has a deleted 'D' file  */
> +#define BFSHOWRAW   0x100            /* Show unprintable as octal     */
>  /*
>   * This structure holds information about recent actions for the Undo 
> command.
>   */
> @@ -490,6 +491,7 @@ int                digit_argument(int, int);
>  int           negative_argument(int, int);
>  int           selfinsert(int, int);
>  int           quote(int, int);
> +int           insert_char(int, int);
>  
>  /* main.c */
>  int           ctrlg(int, int);
> @@ -512,6 +514,8 @@ int                forwline(int, int);
>  int           backline(int, int);
>  void          setgoal(void);
>  int           getgoal(struct line *);
> +size_t                getbyteofcol(const struct line *, size_t, size_t);
> +size_t                getcolofbyte(const struct line *, size_t, size_t, 
> size_t);
>  int           forwpage(int, int);
>  int           backpage(int, int);
>  int           forw1page(int, int);
> @@ -658,6 +662,7 @@ int                notabmode(int, int);
>  #endif       /* NOTAB */
>  int           overwrite_mode(int, int);
>  int           set_default_mode(int,int);
> +int           show_raw_mode(int, int);
>  
>  #ifdef REGEX
>  /* re_search.c X */
> Index: display.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/display.c,v
> retrieving revision 1.48
> diff -u -p -u -p -r1.48 display.c
> --- display.c 6 Jul 2017 19:27:37 -0000       1.48
> +++ display.c 28 May 2018 18:08:10 -0000
> @@ -7,7 +7,7 @@
>   * redisplay system knows almost nothing about the editing
>   * process; the editing functions do, however, set some
>   * hints to eliminate a lot of the grinding. There is more
> - * that can be done; the "vtputc" interface is a real
> + * that can be done; the "vtputs" interface is a real
>   * pig.
>   */
>  
> @@ -18,6 +18,7 @@
>  #include <stdlib.h>
>  #include <string.h>
>  #include <term.h>
> +#include <wchar.h>
>  
>  #include "def.h"
>  #include "kbd.h"
> @@ -52,11 +53,10 @@ struct score {
>  };
>  
>  void vtmove(int, int);
> -void vtputc(int);
>  void vtpute(int);
> -int  vtputs(const char *);
> +int  vtputs(const char *, size_t, size_t);
>  void vteeol(void);
> -void updext(int, int);
> +int  updext(int, int);
>  void modeline(struct mgwin *, int);
>  void setscores(int, int);
>  void traceback(int, int, int, int);
> @@ -216,8 +216,8 @@ vtresize(int force, int newrow, int newc
>       }
>       if (rowchanged || colchanged || first_run) {
>               for (i = 0; i < 2 * (newrow - 1); i++)
> -                     TRYREALLOC(video[i].v_text, newcol);
> -             TRYREALLOC(blanks.v_text, newcol);
> +                     TRYREALLOC(video[i].v_text, newcol * MB_CUR_MAX);
> +             TRYREALLOC(blanks.v_text, newcol * MB_CUR_MAX);

I am not quite sure why the blanks array needs to be grown.
Doesn't the array size relate to the screen size, and doesn't
each ' ' in the array occupy an entire column on the screen?

Then again, the code involved in updating the display is too hairy
for me to digest right now, so I might be missing something.

>       }
>  
>       nrow = newrow;
> @@ -260,7 +260,7 @@ vtinit(void)
>        */
>  
>       blanks.v_color = CTEXT;
> -     for (i = 0; i < ncol; ++i)
> +     for (i = 0; i < ncol * MB_CUR_MAX; ++i)
>               blanks.v_text[i] = ' ';
>  }
>  
> @@ -287,7 +287,7 @@ vttidy(void)
>   * Move the virtual cursor to an origin
>   * 0 spot on the virtual display screen. I could
>   * store the column as a character pointer to the spot
> - * on the line, which would make "vtputc" a little bit
> + * on the line, which would make "vtputs" a little bit
>   * more efficient. No checking for errors.
>   */
>  void
> @@ -298,88 +298,6 @@ vtmove(int row, int col)
>  }
>  
>  /*
> - * Write a character to the virtual display,
> - * dealing with long lines and the display of unprintable
> - * things like control characters. Also expand tabs every 8
> - * columns. This code only puts printing characters into
> - * the virtual display image. Special care must be taken when
> - * expanding tabs. On a screen whose width is not a multiple
> - * of 8, it is possible for the virtual cursor to hit the
> - * right margin before the next tab stop is reached. This
> - * makes the tab code loop if you are not careful.
> - * Three guesses how we found this.
> - */
> -void
> -vtputc(int c)
> -{
> -     struct video    *vp;
> -
> -     c &= 0xff;
> -
> -     vp = vscreen[vtrow];
> -     if (vtcol >= ncol)
> -             vp->v_text[ncol - 1] = '$';
> -     else if (c == '\t'
> -#ifdef       NOTAB
> -         && !(curbp->b_flag & BFNOTAB)
> -#endif
> -         ) {
> -             do {
> -                     vtputc(' ');
> -             } while (vtcol < ncol && (vtcol & 0x07) != 0);
> -     } else if (ISCTRL(c)) {
> -             vtputc('^');
> -             vtputc(CCHR(c));
> -     } else if (isprint(c))
> -             vp->v_text[vtcol++] = c;
> -     else {
> -             char bf[5];
> -
> -             snprintf(bf, sizeof(bf), "\\%o", c);
> -             vtputs(bf);
> -     }
> -}
> -
> -/*
> - * Put a character to the virtual screen in an extended line.  If we are not
> - * yet on left edge, don't print it yet.  Check for overflow on the right
> - * margin.
> - */
> -void
> -vtpute(int c)
> -{
> -     struct video *vp;
> -
> -     c &= 0xff;
> -
> -     vp = vscreen[vtrow];
> -     if (vtcol >= ncol)
> -             vp->v_text[ncol - 1] = '$';
> -     else if (c == '\t'
> -#ifdef       NOTAB
> -         && !(curbp->b_flag & BFNOTAB)
> -#endif
> -         ) {
> -             do {
> -                     vtpute(' ');
> -             } while (((vtcol + lbound) & 0x07) != 0 && vtcol < ncol);
> -     } else if (ISCTRL(c) != FALSE) {
> -             vtpute('^');
> -             vtpute(CCHR(c));
> -     } else if (isprint(c)) {
> -             if (vtcol >= 0)
> -                     vp->v_text[vtcol] = c;
> -             ++vtcol;
> -     } else {
> -             char bf[5], *cp;
> -
> -             snprintf(bf, sizeof(bf), "\\%o", c);
> -             for (cp = bf; *cp != '\0'; cp++)
> -                     vtpute(*cp);
> -     }
> -}
> -
> -/*
>   * Erase from the end of the software cursor to the end of the line on which
>   * the software cursor is located. The display routines will decide if a
>   * hardware erase to end of line command should be used to display this.
> @@ -390,7 +308,7 @@ vteeol(void)
>       struct video *vp;
>  
>       vp = vscreen[vtrow];
> -     while (vtcol < ncol)
> +     while (vtcol < ncol * MB_CUR_MAX)
>               vp->v_text[vtcol++] = ' ';
>  }
>  
> @@ -410,7 +328,7 @@ update(int modelinecolor)
>       struct mgwin    *wp;
>       struct video    *vp1;
>       struct video    *vp2;
> -     int      c, i, j;
> +     int      c, i;
>       int      hflag;
>       int      currow, curcol;
>       int      offs, size;
> @@ -485,8 +403,9 @@ update(int modelinecolor)
>                       vscreen[i]->v_color = CTEXT;
>                       vscreen[i]->v_flag |= (VFCHG | VFHBAD);
>                       vtmove(i, 0);
> -                     for (j = 0; j < llength(lp); ++j)
> -                             vtputc(lgetc(lp, j));
> +                     if (llength(lp)) {
> +                             vtputs(lp->l_text, llength(lp), 0);
> +                     }
>                       vteeol();
>               } else if ((wp->w_rflag & (WFEDIT | WFFULL)) != 0) {
>                       hflag = TRUE;
> @@ -495,8 +414,10 @@ update(int modelinecolor)
>                               vscreen[i]->v_flag |= (VFCHG | VFHBAD);
>                               vtmove(i, 0);
>                               if (lp != wp->w_bufp->b_headp) {
> -                                     for (j = 0; j < llength(lp); ++j)
> -                                             vtputc(lgetc(lp, j));
> +                                     if (llength(lp)) {
> +                                             vtputs(lp->l_text, llength(lp),
> +                                                 0);
> +                                     }
>                                       lp = lforw(lp);
>                               }
>                               vteeol();
> @@ -514,32 +435,53 @@ update(int modelinecolor)
>               ++currow;
>               lp = lforw(lp);
>       }
> +
>       curcol = 0;
>       i = 0;
>       while (i < curwp->w_doto) {
> -             c = lgetc(lp, i++);
> +             char tmp[5];
> +             c = lgetc(lp, i);
>               if (c == '\t'
>  #ifdef       NOTAB
>                   && !(curbp->b_flag & BFNOTAB)
>  #endif
>                       ) {
> -                     curcol |= 0x07;
>                       curcol++;
> +                     while ((curcol - lbound) & 0x07) {
> +                             curcol++;
> +                     }
>               } else if (ISCTRL(c) != FALSE)
>                       curcol += 2;
> -             else if (isprint(c))
> +             else if (isprint(c)) {
>                       curcol++;
> -             else {
> -                     char bf[5];
> -
> -                     snprintf(bf, sizeof(bf), "\\%o", c);
> -                     curcol += strlen(bf);
> +             } else if (!(curbp->b_flag & BFSHOWRAW)) {
> +                     mbstate_t mbs = { 0 };
> +                     wchar_t wc = 0;
> +                     size_t consumed = mbrtowc(&wc, &lp->l_text[i],
> +                                               llength(lp) - i, &mbs);
> +                     int width = -1;
> +                     if (consumed < (size_t) -2) {

I feel like I'm repeating myself :)

> +                             width = wcwidth(wc);
> +                     } else {
> +                             memset(&mbs, 0, sizeof mbs);
> +                     }
> +                     if (width >= 0) {
> +                             curcol += width;
> +                             i += (consumed - 1);
> +                     } else {
> +                             snprintf(tmp, sizeof(tmp), "\\%o", c);

Missing check for error return (< 0) from snprintf().

> +                             curcol += strlen(tmp);

You should check for truncation in snprintf: If it returns >= sizeof(tmp)
the string was truncated which you should treat as an error condition
because would mean your chosen tmp buffer size is too small.

And if there was no truncation, you can use snprintf's return value
directly instead of calling strlen() to calculate the same number again.

> +                     }
> +             } else {
> +                     snprintf(tmp, sizeof(tmp), "\\%o", c);

Same.

> +                     curcol += strlen(tmp);

Same.

>               }
> +             i++;
>       }
>       if (curcol >= ncol - 1) {       /* extended line. */
>               /* flag we are extended and changed */
>               vscreen[currow]->v_flag |= VFEXT | VFCHG;
> -             updext(currow, curcol); /* and output extended line */
> +             curcol = updext(currow, curcol);
>       } else
>               lbound = 0;     /* not extended line */
>  
> @@ -558,8 +500,10 @@ update(int modelinecolor)
>                               if ((wp != curwp) || (lp != wp->w_dotp) ||
>                                   (curcol < ncol - 1)) {
>                                       vtmove(i, 0);
> -                                     for (j = 0; j < llength(lp); ++j)
> -                                             vtputc(lgetc(lp, j));
> +                                     if (llength(lp)) {
> +                                             vtputs(lp->l_text, llength(lp),
> +                                                 0);
> +                                     }
>                                       vteeol();
>                                       /* this line no longer is extended */
>                                       vscreen[i]->v_flag &= ~VFEXT;
> @@ -661,39 +605,44 @@ ucopy(struct video *vvp, struct video *p
>       pvp->v_hash = vvp->v_hash;
>       pvp->v_cost = vvp->v_cost;
>       pvp->v_color = vvp->v_color;
> -     bcopy(vvp->v_text, pvp->v_text, ncol);
> +     bcopy(vvp->v_text, pvp->v_text, ncol * MB_CUR_MAX);
>  }
>  
>  /*
> - * updext: update the extended line which the cursor is currently on at a
> - * column greater than the terminal width. The line will be scrolled right or
> - * left to let the user see where the cursor is.
> + * updext: update the extended line which the cursor is currently on
> + * at a column greater than the terminal width. The line will be
> + * scrolled right or left to let the user see where the cursor
> + * is. curcol may need to be adjusted, depending on how wide
> + * characters and lbound interact, that adjusted position is returned.
>   */
> -void
> +int
>  updext(int currow, int curcol)
>  {
> -     struct line     *lp;                    /* pointer to current line */
> -     int      j;                     /* index into line */
> +     struct line *lp = curwp->w_dotp;        /* pointer to current line */
> +     size_t startbyte;
> +     int bettercol = curcol;
> +     size_t fullextent;
>  
>       if (ncol < 2)
> -             return;
> +             return curcol;
>  
>       /*
> -      * calculate what column the left bound should be
> -      * (force cursor into middle half of screen)
> +      * calculate what column the left bound should be (force
> +      * cursor into middle half of screen). Ensuring that it is at
> +      * a tabstop allows update() to calculate curcol without
> +      * wondering how tabstops are calculated before the first '$'.
>        */
>       lbound = curcol - (curcol % (ncol >> 1)) - (ncol >> 2);
> +     lbound = (lbound | 0x07) + 1;
> +     vscreen[currow]->v_text[0] = '$';
> +     vtmove(currow, 1);
> +     startbyte = getbyteofcol(lp, 0, lbound + 1);
> +     fullextent = getbyteofcol(lp, startbyte, ncol + 1);

This might be a good place for some error checking and falling back
to displaying the line with invalid characeters in an escaped form
if necessary.

> +     vtputs(lp->l_text + startbyte, fullextent - startbyte, 1);
> +     vteeol();
>  
> -     /*
> -      * scan through the line outputing characters to the virtual screen
> -      * once we reach the left edge
> -      */
> -     vtmove(currow, -lbound);                /* start scanning offscreen */
> -     lp = curwp->w_dotp;                     /* line to output */
> -     for (j = 0; j < llength(lp); ++j)       /* until the end-of-line */
> -             vtpute(lgetc(lp, j));
> -     vteeol();                               /* truncate the virtual line */
> -     vscreen[currow]->v_text[0] = '$';       /* and put a '$' in column 1 */
> +     bettercol = lbound + getcolofbyte(lp, startbyte, 1, curwp->w_doto);
> +     return (bettercol);
>  }
>  
>  /*
> @@ -708,12 +657,35 @@ updext(int currow, int curcol)
>  void
>  uline(int row, struct video *vvp, struct video *pvp)
>  {
> -     char  *cp1;
> -     char  *cp2;
> -     char  *cp3;
> -     char  *cp4;
> -     char  *cp5;
> +     char  *cp1; /* Pointer to the start of dirty region */
> +     char  *cp2; /* pvp's counterpart for cp1 */
> +     char  *cp3; /* Pointer to end of dirty region */
> +     char  *cp4; /* pvp's counterpart for cp3 */
> +     char  *cp5; /* After this, within dirty region, all is ' ' */
> +     char  *mbcounter;
>       int    nbflag;
> +     int    startcol; /* onscreen column matching cp1 */
> +     char  *lastbyte; /* byte which handles last onscreen column  */
> +     int    seencols = 0;
> +     mbstate_t mbs = { 0 };
> +     wchar_t wc = 0;
> +
> +     lastbyte = vvp->v_text;
> +     while (seencols < ncol && *lastbyte) {
> +             size_t consumed = mbrtowc(&wc, lastbyte,
> +                 (vvp->v_text + ncol * MB_CUR_MAX - lastbyte), &mbs);
> +             if (consumed < (size_t) -2) {

Again, the error checking as it is done here isn't idiomatic.
See above.

> +                     lastbyte += consumed;
> +                     seencols += wcwidth(wc);

wcwidth() could return -1 here.

> +             } else {
> +                     lastbyte++;
> +                     seencols++;
> +                     memset(&mbs, 0, sizeof mbs);
> +             }
> +     }
> +     if (lastbyte - vvp->v_text < ncol) {
> +             lastbyte = &vvp->v_text[ncol];
> +     }
>  
>       if (vvp->v_color != pvp->v_color) {     /* Wrong color, do a     */
>               ttmove(row, 0);                 /* full redraw.          */
> @@ -729,11 +701,12 @@ uline(int row, struct video *vvp, struct
>                * putting the invisible glitch character on the next line.
>                * (Hazeltine executive 80 model 30)
>                */
> -             cp2 = &vvp->v_text[ncol - (magic_cookie_glitch >= 0 ?
> -                 (magic_cookie_glitch != 0 ? magic_cookie_glitch : 1) : 0)];
> +             cp2 = lastbyte -
> +                 (magic_cookie_glitch >= 0 ? (magic_cookie_glitch != 0 ?
> +                 magic_cookie_glitch : 1) : 0);
>  #else
>               cp1 = &vvp->v_text[0];
> -             cp2 = &vvp->v_text[ncol];
> +             cp2 = lastbyte;
>  #endif
>               while (cp1 != cp2) {
>                       ttputc(*cp1++);
> @@ -744,21 +717,31 @@ uline(int row, struct video *vvp, struct
>       }
>       cp1 = &vvp->v_text[0];          /* Compute left match.   */
>       cp2 = &pvp->v_text[0];
> -     while (cp1 != &vvp->v_text[ncol] && cp1[0] == cp2[0]) {
> +     while (cp1 != lastbyte && cp1[0] == cp2[0]) {
>               ++cp1;
>               ++cp2;
>       }
> -     if (cp1 == &vvp->v_text[ncol])  /* All equal.            */
> +     if (cp1 == lastbyte)    /* All equal.            */
>               return;
> +     while (cp1 != vvp->v_text && !isprint(*cp1) &&
> +            mbrtowc(&wc, cp1, (lastbyte - cp1), &mbs) >= (size_t) -2) {

NEVER call mbrtowc in a loop header like that.
Call it inside the loop body and handle errors.

> +             --cp1;
> +             --cp2;
> +     }
>       nbflag = FALSE;
> -     cp3 = &vvp->v_text[ncol];       /* Compute right match.  */
> -     cp4 = &pvp->v_text[ncol];
> +     cp3 = lastbyte; /* Compute right match.  */
> +     cp4 = &pvp->v_text[lastbyte - vvp->v_text];
>       while (cp3[-1] == cp4[-1]) {
>               --cp3;
>               --cp4;
>               if (cp3[0] != ' ')      /* Note non-blanks in    */
>                       nbflag = TRUE;  /* the right match.      */
>       }
> +     while (cp3 != lastbyte && !isprint(*cp3) &&
> +             mbrtowc(&wc, cp3, (lastbyte - cp3), &mbs) >= (size_t) -2) {

Same.

> +             ++cp3;
> +             ++cp4;
> +     }
>       cp5 = cp3;                      /* Is erase good?        */
>       if (nbflag == FALSE && vvp->v_color == CTEXT) {
>               while (cp5 != cp1 && cp5[-1] == ' ')
> @@ -768,13 +751,27 @@ uline(int row, struct video *vvp, struct
>                       cp5 = cp3;
>       }
>       /* Alcyon hack */
> -     ttmove(row, (int) (cp1 - &vvp->v_text[0]));
> +     startcol = 0;
> +     mbcounter = vvp->v_text;
> +     while ((cp1 - mbcounter) > 0) {
> +             size_t consumed = mbrtowc(&wc, mbcounter, (cp1 - mbcounter),
> +                                       &mbs);
> +             if (consumed < (size_t) -2) {

Again, please use a better idiom.

> +                     mbcounter += consumed;
> +                     startcol += wcwidth(wc);

wcwidth() could return -1.

> +             } else {
> +                     mbcounter++;
> +                     startcol++;
> +                     memset(&mbs, 0, sizeof mbs);
> +             }
> +     }
> +     ttmove(row, startcol);
>  #ifdef       STANDOUT_GLITCH
>       if (vvp->v_color != CTEXT && magic_cookie_glitch > 0) {
>               if (cp1 < &vvp->v_text[magic_cookie_glitch])
>                       cp1 = &vvp->v_text[magic_cookie_glitch];
> -             if (cp5 > &vvp->v_text[ncol - magic_cookie_glitch])
> -                     cp5 = &vvp->v_text[ncol - magic_cookie_glitch];
> +             if (cp5 > lastbyte - magic_cookie_glitch)
> +                     cp5 = lastbyte - magic_cookie_glitch;
>       } else if (magic_cookie_glitch < 0)
>  #endif
>               ttcolor(vvp->v_color);
> @@ -807,46 +804,39 @@ modeline(struct mgwin *wp, int modelinec
>       vscreen[n]->v_flag |= (VFCHG | VFHBAD); /* Recompute, display.   */
>       vtmove(n, 0);                           /* Seek to right line.   */
>       bp = wp->w_bufp;
> -     vtputc('-');
> -     vtputc('-');
> +     n = vtputs("--", 0, 0);
>       if ((bp->b_flag & BFREADONLY) != 0) {
> -             vtputc('%');
> +             n += vtputs("%", 0, n);
>               if ((bp->b_flag & BFCHG) != 0)
> -                     vtputc('*');
> +                     n += vtputs("*", 0, n);
>               else
> -                     vtputc('%');
> +                     n += vtputs("%", 0, n);
>       } else if ((bp->b_flag & BFCHG) != 0) { /* "*" if changed.       */
> -             vtputc('*');
> -             vtputc('*');
> +             n += vtputs("**", 0, n);
>       } else {
> -             vtputc('-');
> -             vtputc('-');
> +             n += vtputs("--", 0, n);
>       }
> -     vtputc('-');
> +     n += vtputs("-", 0, n);
>       n = 5;

n is being reset to 5 here, overwriting what vtputs() returned.
That looks wrong.

> -     n += vtputs("Mg: ");
> +     n += vtputs("Mg: ", 0, n);
>       if (bp->b_bname[0] != '\0')
> -             n += vtputs(&(bp->b_bname[0]));
> +             n += vtputs(&(bp->b_bname[0]), 0, n);
>       while (n < 42) {                        /* Pad out with blanks.  */
> -             vtputc(' ');
> -             ++n;
> +             n += vtputs(" ", 0, n);
>       }
> -     vtputc('(');
> -     ++n;
> +     n += vtputs("(", 0, n);
>       for (md = 0; ; ) {
> -             n += vtputs(bp->b_modes[md]->p_name);
> +             n += vtputs(bp->b_modes[md]->p_name, 0, n);
>               if (++md > bp->b_nmodes)
>                       break;
> -             vtputc('-');
> -             ++n;
> +             n += vtputs("-", 0, n);
>       }
>       /* XXX These should eventually move to a real mode */
>       if (macrodef == TRUE)
> -             n += vtputs("-def");
> +             n += vtputs("-def", 0, n);
>       if (globalwd == TRUE)
> -             n += vtputs("-gwd");
> -     vtputc(')');
> -     ++n;
> +             n += vtputs("-gwd", 0, n);
> +     n += vtputs(")", 0, n);
>  
>       if (linenos && colnos)
>               len = snprintf(sl, sizeof(sl), "--L%d--C%d", wp->w_dotline,
> @@ -856,27 +846,132 @@ modeline(struct mgwin *wp, int modelinec
>       else if (colnos)
>               len = snprintf(sl, sizeof(sl), "--C%d", getcolpos(wp));
>       if ((linenos || colnos) && len < sizeof(sl) && len != -1)
> -             n += vtputs(sl);
> +             n += vtputs(sl, 0, n);
>  
>       while (n < ncol) {                      /* Pad out.              */
> -             vtputc('-');
> -             ++n;
> +             n += vtputs("-", 0, n);
>       }
>  }
>  
>  /*
> - * Output a string to the mode line, report how long it was.
> + * Output a string to the mode line, report how long it was,
> + * dealing with long lines and the display of unprintable
> + * things like control characters. Also expand tabs every 8
> + * columns. This code only puts printing characters into
> + * the virtual display image. Special care must be taken when
> + * expanding tabs. On a screen whose width is not a multiple
> + * of 8, it is possible for the virtual cursor to hit the
> + * right margin before the next tab stop is reached. This
> + * makes the tab code loop if you are not careful.
> + * Three guesses how we found this.
>   */
>  int
> -vtputs(const char *s)
> +vtputs(const char *s, const size_t max_bytes, const size_t initial_col)
>  {
> -     int n = 0;
> +     const unsigned char *us = (const unsigned char *) s;
> +     struct video *vp = vscreen[vtrow];
> +     size_t bytes_handled = 0;
> +     size_t last_full_byte_start = vtcol;
> +     size_t space_printed = 0;
> +
> +     if (!s) {
> +             return (0);
> +     }
> +
> +     while (*us && (!max_bytes || bytes_handled < max_bytes)) {
> +             if (space_printed + initial_col >= ncol) {
> +                     break;
> +             } else if (*us == '\t'
> +#ifdef       NOTAB
> +                        && !(curbp->b_flag & BFNOTAB)
> +#endif
> +                        ) {
> +                     last_full_byte_start = vtcol;
> +                     do {
> +                             if (vtcol >= 0) {
> +                                     last_full_byte_start = vtcol;
> +                                     vp->v_text[vtcol] = ' ';
> +                             }
> +                             vtcol++;
> +                             space_printed++;
> +                     } while (space_printed + initial_col < ncol &&
> +                              ((space_printed + initial_col) & 0x07));
> +                     us++;
> +                     bytes_handled++;
> +             } else if (ISCTRL(*us)) {

Won't this interpret multi-byte characters and only look at the first byte?
It seems you'll need some mbtrtowc action in this function, too.

> +                     last_full_byte_start = vtcol;
> +                     if (vtcol >= 0) {
> +                             vp->v_text[vtcol] = '^';
> +                     }
> +                     vtcol++;
> +                     if (vtcol >= 0) {
> +                             vp->v_text[vtcol] = CCHR(*us);
> +                     }
> +                     vtcol++;
> +                     bytes_handled++;
> +                     space_printed += 2;
> +                     us++;
> +             } else if (isprint(*us)) {

Same problem.

> +                     last_full_byte_start = vtcol;
> +                     if (vtcol >= 0) {
> +                             vp->v_text[vtcol] = *us++;
> +                     }
> +                     vtcol++;
> +                     bytes_handled++;
> +                     space_printed++;
> +             } else if (!(curbp->b_flag & BFSHOWRAW)) {
> +                     mbstate_t mbs = { 0 };
> +                     wchar_t wc = 0;
> +                     size_t consumable = max_bytes ?
> +                             (max_bytes - bytes_handled) : -1;
> +                     size_t consumed = mbrtowc(&wc, (const char *)us,
> +                                               consumable, &mbs);
> +                     int width = -1;
> +                     last_full_byte_start = vtcol;
> +                     if (consumed < (size_t) -2) {

Same mbrtowc return value handling problem.

> +                             width = wcwidth(wc);
> +                     }

Check width == -1 here?

> +                     if (width >= 0) {
> +                             bytes_handled += consumed;
> +                             space_printed += width;
> +                             do {
> +                                     if (vtcol >= 0) {
> +                                             vp->v_text[vtcol] = *us++;
> +                                     }
> +                                     vtcol++;
> +                             } while (--consumed);
> +                     } else {

Cause otherwise this block runs with a non-printable multi-byte
character. Is that what you want?

> +                             char bf[5];
> +                             snprintf(bf, sizeof(bf), "\\%o", *us);
> +                             bytes_handled++;
> +                             space_printed += vtputs(bf, 0,
> +                                 space_printed + initial_col);
> +                             us++;
> +                     }
> +             } else {
> +                     char bf[5];
> +                     last_full_byte_start = vtcol;
> +                     snprintf(bf, sizeof(bf), "\\%o", *us);
> +                     bytes_handled++;
> +                     space_printed += vtputs(bf, 0,
> +                         space_printed + initial_col);
> +                     us++;
> +             }
> +     }
>  
> -     while (*s != '\0') {
> -             vtputc(*s++);
> -             ++n;
> +     if ((space_printed + initial_col > ncol) ||
> +         (space_printed + initial_col == ncol &&
> +         (*us && (!max_bytes || bytes_handled < max_bytes)))) {
> +             vp->v_text[last_full_byte_start] = '$';
> +             while (++last_full_byte_start <= vtcol) {
> +                     vp->v_text[last_full_byte_start] = ' ';
> +             }
> +             bytes_handled++;
> +             space_printed++;
> +             us++;
>       }
> -     return (n);
> +
> +     return (space_printed);
>  }
>  
>  /*
> @@ -894,11 +989,11 @@ hash(struct video *vp)
>       char   *s;
>  
>       if ((vp->v_flag & VFHBAD) != 0) {       /* Hash bad.             */
> -             s = &vp->v_text[ncol - 1];
> -             for (i = ncol; i != 0; --i, --s)
> +             s = &vp->v_text[ncol * MB_CUR_MAX - 1];
> +             for (i = ncol * MB_CUR_MAX; i != 0; --i, --s)
>                       if (*s != ' ')
>                               break;
> -             n = ncol - i;                   /* Erase cheaper?        */
> +             n = ncol * MB_CUR_MAX - i;                      /* Erase 
> cheaper?        */
>               if (n > tceeol)
>                       n = tceeol;
>               vp->v_cost = i + n;             /* Bytes + blanks.       */
> Index: echo.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/echo.c,v
> retrieving revision 1.66
> diff -u -p -u -p -r1.66 echo.c
> --- echo.c    24 Oct 2016 17:18:42 -0000      1.66
> +++ echo.c    28 May 2018 18:08:10 -0000

It looks like the changes made to this file could be split off into
a separate diff that could be reviewed ahead of any changes for UTF-8.

> @@ -844,9 +844,11 @@ ewprintf(const char *fmt, ...)
>   * %k prints the name of the current key (and takes no arguments).
>   * %d prints a decimal integer
>   * %o prints an octal integer
> + * %x prints a hexadecimal integer
>   * %p prints a pointer
>   * %s prints a string
>   * %ld prints a long word
> + * %lx prints a hexadecimal long word
>   * Anything else is echoed verbatim
>   */
>  static void
> @@ -885,6 +887,10 @@ eformat(const char *fp, va_list ap)
>                               eputi(va_arg(ap, int), 8);
>                               break;
>  
> +                     case 'x':
> +                             eputi(va_arg(ap, int), 16);
> +                             break;
> +
>                       case 'p':
>                               snprintf(tmp, sizeof(tmp), "%p",
>                                   va_arg(ap, void *));
> @@ -902,6 +908,9 @@ eformat(const char *fp, va_list ap)
>                               case 'd':
>                                       eputl(va_arg(ap, long), 10);
>                                       break;
> +                             case 'x':
> +                                     eputl(va_arg(ap, long), 16);
> +                                     break;
>                               default:
>                                       eputc(c);
>                                       break;
> @@ -939,6 +948,7 @@ static void
>  eputl(long l, int r)
>  {
>       long     q;
> +     int      c;
>  
>       if (l < 0) {
>               eputc('-');
> @@ -946,7 +956,10 @@ eputl(long l, int r)
>       }
>       if ((q = l / r) != 0)
>               eputl(q, r);
> -     eputc((int)(l % r) + '0');
> +     c = (int)(l % r) + '0';
> +     if (c > '9')
> +             c += 'a' - '9' - 1;
> +     eputc(c);
>  }
>  
>  /*
> Index: fileio.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/fileio.c,v
> retrieving revision 1.105
> diff -u -p -u -p -r1.105 fileio.c
> --- fileio.c  13 Apr 2018 14:11:37 -0000      1.105
> +++ fileio.c  28 May 2018 18:08:10 -0000
> @@ -1,4 +1,4 @@
> -/*   $OpenBSD: fileio.c,v 1.105 2018/04/13 14:11:37 florian Exp $    */
> +/*   $OpenBSD: fileio.c,v 1.104 2017/05/30 07:05:22 florian Exp $    */
>  
>  /* This file is in the public domain. */
>  

What are the changes in this file for? Are they unrelated changes?
Looks like they're backing out the r1.105 commit? Why?

> @@ -722,12 +722,15 @@ expandtilde(const char *fn)
>                       return (NULL);
>               return(ret);
>       }
> -     if (ulen == 0) /* ~/ or ~ */
> -             pw = getpwuid(geteuid());
> -     else { /* ~user/ or ~user */
> +     pw = getpwuid(geteuid());
> +     if (ulen == 0) { /* ~/ or ~ */
> +             if (pw != NULL)
> +                     (void)strlcpy(user, pw->pw_name, sizeof(user));
> +             else
> +                     user[0] = '\0';
> +     } else { /* ~user/ or ~user */
>               memcpy(user, &fn[1], ulen);
>               user[ulen] = '\0';
> -             pw = getpwnam(user);
>       }
>       if (pw != NULL) {
>               plen = strlcpy(path, pw->pw_dir, sizeof(path));
> Index: funmap.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/funmap.c,v
> retrieving revision 1.53
> diff -u -p -u -p -r1.53 funmap.c
> --- funmap.c  14 Apr 2016 17:05:32 -0000      1.53
> +++ funmap.c  28 May 2018 18:08:10 -0000
> @@ -114,6 +114,7 @@ static struct funmap functnames[] = {
>       {bufferinsert, "insert-buffer",},
>       {fileinsert, "insert-file",},
>       {fillword, "insert-with-wrap",},
> +     {insert_char, "insert-char",},
>       {backisearch, "isearch-backward",},
>       {forwisearch, "isearch-forward",},
>       {joinline, "join-line",},
> @@ -191,6 +192,7 @@ static struct funmap functnames[] = {
>       {shellcommand, "shell-command",},
>       {piperegion, "shell-command-on-region",},
>       {shrinkwind, "shrink-window",},
> +     {show_raw_mode, "show-raw-mode",},
>  #ifdef NOTAB
>       {space_to_tabstop, "space-to-tabstop",},
>  #endif /* NOTAB */
> Index: kbd.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/kbd.c,v
> retrieving revision 1.30
> diff -u -p -u -p -r1.30 kbd.c
> --- kbd.c     26 Sep 2015 21:51:58 -0000      1.30
> +++ kbd.c     28 May 2018 18:08:10 -0000
> @@ -9,6 +9,8 @@
>  #include <sys/queue.h>
>  #include <signal.h>
>  #include <stdio.h>
> +#include <stdlib.h>
> +#include <wchar.h>
>  
>  #include "def.h"
>  #include "kbd.h"
> @@ -403,6 +405,43 @@ quote(int f, int n)
>                       ungetkey(c);
>       }
>       return (selfinsert(f, n));
> +}
> +
> +/*
> + * Prompt for a codepoint in whatever the native system's encoding is,
> + * insert it into the file
> + */
> +int
> +insert_char(int f, int n)
> +{
> +     char *bufp;
> +     char inpbuf[32];
> +     wchar_t wc;
> +     char mb[MB_CUR_MAX + 1];

The + 1 here is not needed, unless you intend to use mb as
a NUL terminated string. Which doesn't seem to be the case
because the loop below is bound by mbslen.

> +     mbstate_t mbs = { 0 };
> +     size_t mbslen;
> +     size_t i;
> +
> +     if ((bufp = eread("Insert character (hex): ", inpbuf, sizeof inpbuf,
> +          EFNEW)) == NULL) {
> +             return (ABORT);
> +     } else if (bufp[0] == '\0') {
> +             return (FALSE);
> +     }
> +
> +     wc = (wchar_t) strtoll(bufp, NULL, 16);
> +     mbslen = wcrtomb(mb, wc, &mbs);
> +     if (mbslen == (size_t) -1) {
> +             return (FALSE);
> +     }

In case you wanted mb to be NUL terminated that would have to be done here.
wcrtomb(3) will write at most MB_CUR_MAX bytes.

> +
> +     for (i = 0; i < mbslen; ++i) {
> +             if (linsert(1, mb[i]) == FALSE) {
> +                     return (FALSE);
> +             }
> +     }
> +
> +     return (TRUE);
>  }
>  
>  /*
> Index: keymap.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/keymap.c,v
> retrieving revision 1.58
> diff -u -p -u -p -r1.58 keymap.c
> --- keymap.c  29 Dec 2015 19:44:32 -0000      1.58
> +++ keymap.c  28 May 2018 18:08:10 -0000
> @@ -120,6 +120,21 @@ static struct KEYMAPE (2) cX4map = {
>       }
>  };
>  
> +static PF cX8J[] = {
> +     insert_char             /* ^M */
> +};
> +
> +static struct KEYMAPE (1) cX8map = {
> +     1,
> +     1,
> +     rescan,
> +     {
> +             {
> +                     CCHR('M'), CCHR('M'), cX8J, NULL
> +             }
> +     }
> +};
> +
>  static PF cXcB[] = {
>       listbuffers,            /* ^B */
>       quit,                   /* ^C */
> @@ -158,6 +173,10 @@ static PF cX0[] = {
>       NULL                    /* 4 */
>  };
>  
> +static PF cX8[] = {
> +     NULL                    /* 4 */
> +};
> +
>  static PF cXeq[] = {
>       showcpos                /* = */
>  };
> @@ -189,9 +208,9 @@ static PF cXcar[] = {
>       undo                    /* u */
>  };
>  
> -struct KEYMAPE (6) cXmap = {
> -     6,
> -     6,
> +struct KEYMAPE (7) cXmap = {
> +     7,
> +     7,
>       rescan,
>       {
>               {
> @@ -207,6 +226,9 @@ struct KEYMAPE (6) cXmap = {
>                       '0', '4', cX0, (KEYMAP *) & cX4map
>               },
>               {
> +                     '8', '8', cX8, (KEYMAP *) & cX8map
> +             },
> +             {
>                       '=', '=', cXeq, NULL
>               },
>               {
> @@ -491,6 +513,18 @@ static struct KEYMAPE (1) overwmap = {
>       }
>  };
>  
> +static struct KEYMAPE (1) rawmap = {
> +     0,
> +     1,              /* 1 to avoid 0 sized array */
> +     rescan,
> +     {
> +             /* unused dummy entry for VMS C */
> +             {
> +                     (KCHAR)0, (KCHAR)0, NULL, NULL
> +             }
> +     }
> +};
> +
>  
>  /*
>   * The basic (root) keyboard map
> @@ -513,6 +547,7 @@ static struct maps_s map_table[] = {
>       {(KEYMAP *) &notabmap, "notab",},
>  #endif /* NOTAB */
>       {(KEYMAP *) &overwmap, "overwrite",},
> +     {(KEYMAP *) &rawmap, "raw",},
>       {(KEYMAP *) &metamap, "esc prefix",},
>       {(KEYMAP *) &cXmap, "c-x prefix",},
>       {(KEYMAP *) &cX4map, "c-x 4 prefix",},
> Index: mg.1
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/mg.1,v
> retrieving revision 1.106
> diff -u -p -u -p -r1.106 mg.1
> --- mg.1      11 Dec 2017 07:27:07 -0000      1.106
> +++ mg.1      28 May 2018 18:08:10 -0000
> @@ -1,7 +1,7 @@
>  .\"  $OpenBSD: mg.1,v 1.106 2017/12/11 07:27:07 jmc Exp $
>  .\" This file is in the public domain.
>  .\"
> -.Dd $Mdocdate: December 11 2017 $
> +.Dd $Mdocdate: May 28 2018 $
>  .Dt MG 1
>  .Os
>  .Sh NAME
> @@ -849,7 +849,7 @@ This is the default.
>  .It set-default-mode
>  Append the supplied mode to the list of default modes
>  used by subsequent buffer creation.
> -Built in modes include: fill, indent and overwrite.
> +Built in modes include: fill, indent, overwrite, and raw.
>  .It set-fill-column
>  Prompt the user for a fill column.
>  Used by auto-fill-mode.
> @@ -861,6 +861,8 @@ Sets the prefix string to be used by the
>  Execute external command from mini-buffer.
>  .It shell-command-on-region
>  Provide the text in region to the shell command as input.
> +.It show-raw-mode
> +ASCII encoding mode.
>  .It shrink-window
>  Shrink current window by one line.
>  The window immediately below is expanded to pick up the slack.
> @@ -1091,5 +1093,3 @@ In order to use 8-bit characters (such a
>  needs to be disabled via the
>  .Dq meta-key-mode
>  command.
> -.Pp
> -Multi-byte character sets, such as UTF-8, are not supported.
> Index: modes.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/modes.c,v
> retrieving revision 1.21
> diff -u -p -u -p -r1.21 modes.c
> --- modes.c   30 May 2017 07:05:22 -0000      1.21
> +++ modes.c   28 May 2018 18:08:10 -0000
> @@ -111,6 +111,23 @@ overwrite_mode(int f, int n)
>  }
>  
>  int
> +show_raw_mode(int f, int n)
> +{
> +     if (changemode(f, n, "raw") == FALSE)
> +             return (FALSE);
> +     if (f & FFARG) {
> +             if (n <= 0)
> +                     curbp->b_flag &= ~BFSHOWRAW;
> +             else
> +                     curbp->b_flag |= BFSHOWRAW;
> +     } else
> +             curbp->b_flag ^= BFSHOWRAW;
> +
> +     sgarbf = TRUE;
> +     return (TRUE);
> +}
> +
> +int
>  set_default_mode(int f, int n)
>  {
>       int      i;
> @@ -170,5 +187,11 @@ set_default_mode(int f, int n)
>                       defb_flag |= BFNOTAB;
>       }
>  #endif       /* NOTAB */
> +     if (strcmp(modebuf, "raw") == 0) {
> +             if (n <= 0)
> +                     defb_flag &= ~BFSHOWRAW;
> +             else
> +                     defb_flag |= BFSHOWRAW;
> +     }
>       return (TRUE);
>  }
> Index: util.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/mg/util.c,v
> retrieving revision 1.38
> diff -u -p -u -p -r1.38 util.c
> --- util.c    18 Nov 2015 18:21:06 -0000      1.38
> +++ util.c    28 May 2018 18:08:10 -0000
> @@ -13,6 +13,9 @@
>  #include <ctype.h>
>  #include <signal.h>
>  #include <stdio.h>
> +#include <string.h>
> +#include <stdlib.h>
> +#include <wchar.h>
>  
>  #include "def.h"
>  
> @@ -33,6 +36,9 @@ showcpos(int f, int n)
>       int      nline, row;
>       int      cline, cbyte;          /* Current line/char/byte */
>       int      ratio;
> +     char     ismb = 0;
> +     wchar_t  wc = 0;
> +     char     mbc[MB_CUR_MAX + 1];
>  
>       /* collect the data */
>       clp = bfirstlp(curbp);
> @@ -69,8 +75,43 @@ showcpos(int f, int n)
>               clp = lforw(clp);
>       }
>       ratio = nchar ? (100L * cchar) / nchar : 100;
> -     ewprintf("Char: %c (0%o)  point=%ld(%d%%)  line=%d  row=%d  col=%d",
> -         cbyte, cbyte, cchar, ratio, cline, row, getcolpos(curwp));
> +
> +     if (!(curbp->b_flag & BFSHOWRAW)) {
> +             mbstate_t mbs = { 0 };
> +             size_t consumed = 0;
> +             size_t offset = 0;
> +             while (cbyte != '\n' && offset <= curwp->w_doto) {
> +                     int c = lgetc(clp, curwp->w_doto - offset);
> +                     if (isprint(c) || (ISCTRL(c) != FALSE)) {
> +                             break;
> +                     }
> +                     consumed = mbrtowc(&wc,
> +                                        &clp->l_text[curwp->w_doto - offset],
> +                                        llength(clp) - curwp->w_doto + 
> offset,
> +                                        &mbs);
> +                     if (consumed < (size_t) -2) {

Again, please use a better mbrtowc() idiom.

> +                             ismb = (offset < consumed);
> +                             snprintf(mbc, consumed + 1, "%s",
> +                                      &clp->l_text[curwp->w_doto - offset]);
> +                             mbc[consumed + 1] = '\0';
> +                             break;
> +                     } else {
> +                             memset(&mbs, 0, sizeof mbs);
> +                     }
> +                     offset++;
> +             }
> +     }
> +
> +     if (ismb) {
> +             ewprintf("Char: %s (codepoint 0x%lx) Byte: %c (0%o)  "
> +                      "point=%ld(%d%%)  line=%d  row=%d col=%d", mbc,
> +                      (long) wc, cbyte, cbyte, cchar, ratio, cline, row,
> +                      getcolpos(curwp));
> +     } else {
> +             ewprintf("Char: %c (0%o)  point=%ld(%d%%)  line=%d  row=%d"
> +                      "col=%d", cbyte, cbyte, cchar, ratio, cline, row,
> +                      getcolpos(curwp));
> +     }
>       return (TRUE);
>  }
>  
> @@ -96,6 +137,22 @@ getcolpos(struct mgwin *wp)
>                       col += 2;
>               else if (isprint(c)) {
>                       col++;
> +             } else if (!(wp->w_bufp->b_flag & BFSHOWRAW)) {
> +                     mbstate_t mbs = { 0 };
> +                     wchar_t wc = 0;
> +                     size_t consumed = mbrtowc(&wc, &wp->w_dotp->l_text[i],
> +                                               llength(wp->w_dotp) - i,
> +                                               &mbs);
> +                     int width = -1;
> +                     if (consumed < (size_t) -2) {

Same.

> +                             width = wcwidth(wc);
> +                     }

width == -1 ?

> +                     if (width >= 0) {
> +                             col += width;
> +                             i += (consumed - 1);
> +                     } else {
> +                             col += snprintf(tmp, sizeof(tmp), "\\%o", c);

snprintf can return -1

> +                     }
>               } else {
>                       col += snprintf(tmp, sizeof(tmp), "\\%o", c);

snprintf can return -1

>               }

Reply via email to