The following reply was made to PR bin/175418; it has been noted by GNATS.

From: Brooks Davis <bro...@freebsd.org>
To: "J.R. Oldroyd" <f...@opal.com>
Cc: freebsd-gnats-sub...@freebsd.org
Subject: Re: bin/175418: update vis(3) and vis(1) to support multibyte
 characters
Date: Fri, 18 Jan 2013 16:40:16 -0600

 On Fri, Jan 18, 2013 at 03:12:30PM -0500, J.R. Oldroyd wrote:
 > The vis(3) library calls and the vis(1) program do not support multibyte
 > character sets.  As a result many printable characters are not displayed
 > properly and vice-versa.  This patch enhances vis(3) to support multibyte
 > characters according to the setting of LC_CTYPE and also adjusts vis(1)
 > so that it reads input in multibyte aware manner.
 
 Thank you for your submission.  In a case of lousy timing, I merged the
 replaceming of our vis(3) implementation with NetBSD's to stable/9 four
 days ago.  Any changes now need to go through NetBSD.  The good news is
 that we share a common heritage so much of your patch may still apply (I
 haven't tried).
 
 -- Brooks
 
 > 
 > Since vis(3) is also used by ps(1), this patch fixes ps(1) so that wide
 > characters in command arguments are displayed properly.
 > >How-To-Repeat:
 > n/a
 > >Fix:
 > --- lib/libc/gen/vis.c.orig  2013-01-02 19:26:41.000000000 -0500
 > +++ lib/libc/gen/vis.c       2013-01-17 14:45:55.000000000 -0500
 > @@ -35,167 +35,233 @@
 >  
 >  #include <sys/types.h>
 >  #include <limits.h>
 > +#include <stdlib.h>
 > +#include <wchar.h>
 > +#include <wctype.h>
 > +#include <string.h>
 >  #include <ctype.h>
 >  #include <stdio.h>
 >  #include <vis.h>
 >  
 > -#define     isoctal(c)      (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
 > +#define     iswoctal(c)     (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
 >  
 >  /*
 > - * vis - visually encode characters
 > + * _vis - visually encode wide characters
 >   */
 > -char *
 > -vis(dst, c, flag, nextc)
 > -    char *dst;
 > -    int c, nextc;
 > +wchar_t *
 > +_vis(dst, c, flag, nextc)
 > +    wchar_t *dst;
 > +    wint_t c, nextc;
 >      int flag;
 >  {
 > -    c = (unsigned char)c;
 > -
 >      if (flag & VIS_HTTPSTYLE) {
 >              /* Described in RFC 1808 */
 > -            if (!(isalnum(c) /* alpha-numeric */
 > +            if (!(iswalnum(c) /* alpha-numeric */
 >                  /* safe */
 > -                || c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
 > +                || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == 
 > L'+'
 >                  /* extra */
 > -                || c == '!' || c == '*' || c == '\'' || c == '('
 > -                || c == ')' || c == ',')) {
 > -                    *dst++ = '%';
 > -                    snprintf(dst, 4, (c < 16 ? "0%X" : "%X"), c);
 > +                || c == L'!' || c == L'*' || c == L'\'' || c == L'('
 > +                || c == L')' || c == L',')) {
 > +                    *dst++ = L'%';
 > +                    swprintf(dst, 4, (c < 16 ? L"0%X" : L"%X"), c);
 >                      dst += 2;
 >                      goto done;
 >              }
 >      }
 >  
 >      if ((flag & VIS_GLOB) &&
 > -        (c == '*' || c == '?' || c == '[' || c == '#'))
 > +        (c == L'*' || c == L'?' || c == L'[' || c == L'#'))
 >              ;
 > -    else if (isgraph(c) ||
 > -       ((flag & VIS_SP) == 0 && c == ' ') ||
 > -       ((flag & VIS_TAB) == 0 && c == '\t') ||
 > -       ((flag & VIS_NL) == 0 && c == '\n') ||
 > -       ((flag & VIS_SAFE) && (c == '\b' || c == '\007' || c == '\r'))) {
 > +    else if (iswgraph(c) ||
 > +       ((flag & VIS_SP) == 0 && c == L' ') ||
 > +       ((flag & VIS_TAB) == 0 && c == L'\t') ||
 > +       ((flag & VIS_NL) == 0 && c == L'\n') ||
 > +       ((flag & VIS_SAFE) && (c == L'\b' || c == L'\007' || c == L'\r'))) {
 >              *dst++ = c;
 > -            if (c == '\\' && (flag & VIS_NOSLASH) == 0)
 > -                    *dst++ = '\\';
 > -            *dst = '\0';
 > -            return (dst);
 > +            if (c == L'\\' && (flag & VIS_NOSLASH) == 0)
 > +                    *dst++ = L'\\';
 > +            goto done;
 >      }
 >  
 >      if (flag & VIS_CSTYLE) {
 >              switch(c) {
 > -            case '\n':
 > -                    *dst++ = '\\';
 > -                    *dst++ = 'n';
 > -                    goto done;
 > -            case '\r':
 > -                    *dst++ = '\\';
 > -                    *dst++ = 'r';
 > -                    goto done;
 > -            case '\b':
 > -                    *dst++ = '\\';
 > -                    *dst++ = 'b';
 > -                    goto done;
 > -            case '\a':
 > -                    *dst++ = '\\';
 > -                    *dst++ = 'a';
 > -                    goto done;
 > -            case '\v':
 > -                    *dst++ = '\\';
 > -                    *dst++ = 'v';
 > -                    goto done;
 > -            case '\t':
 > -                    *dst++ = '\\';
 > -                    *dst++ = 't';
 > -                    goto done;
 > -            case '\f':
 > -                    *dst++ = '\\';
 > -                    *dst++ = 'f';
 > -                    goto done;
 > -            case ' ':
 > -                    *dst++ = '\\';
 > -                    *dst++ = 's';
 > -                    goto done;
 > -            case '\0':
 > -                    *dst++ = '\\';
 > -                    *dst++ = '0';
 > -                    if (isoctal(nextc)) {
 > -                            *dst++ = '0';
 > -                            *dst++ = '0';
 > +            case L'\n':
 > +                    *dst++ = L'\\';
 > +                    *dst++ = L'n';
 > +                    goto done;
 > +            case L'\r':
 > +                    *dst++ = L'\\';
 > +                    *dst++ = L'r';
 > +                    goto done;
 > +            case L'\b':
 > +                    *dst++ = L'\\';
 > +                    *dst++ = L'b';
 > +                    goto done;
 > +            case L'\a':
 > +                    *dst++ = L'\\';
 > +                    *dst++ = L'a';
 > +                    goto done;
 > +            case L'\v':
 > +                    *dst++ = L'\\';
 > +                    *dst++ = L'v';
 > +                    goto done;
 > +            case L'\t':
 > +                    *dst++ = L'\\';
 > +                    *dst++ = L't';
 > +                    goto done;
 > +            case L'\f':
 > +                    *dst++ = L'\\';
 > +                    *dst++ = L'f';
 > +                    goto done;
 > +            case L' ':
 > +                    *dst++ = L'\\';
 > +                    *dst++ = L's';
 > +                    goto done;
 > +            case L'\0':
 > +                    *dst++ = L'\\';
 > +                    *dst++ = L'0';
 > +                    if (iswoctal(nextc)) {
 > +                            *dst++ = L'0';
 > +                            *dst++ = L'0';
 >                      }
 >                      goto done;
 >              }
 >      }
 > -    if (((c & 0177) == ' ') || isgraph(c) || (flag & VIS_OCTAL)) {
 > -            *dst++ = '\\';
 > -            *dst++ = ((u_char)c >> 6 & 07) + '0';
 > -            *dst++ = ((u_char)c >> 3 & 07) + '0';
 > -            *dst++ = ((u_char)c & 07) + '0';
 > +    if (((c & 0177) == L' ') || (flag & VIS_OCTAL)) {
 > +            *dst++ = L'\\';
 > +            *dst++ = ((u_char)c >> 6 & 07) + L'0';
 > +            *dst++ = ((u_char)c >> 3 & 07) + L'0';
 > +            *dst++ = ((u_char)c & 07) + L'0';
 >              goto done;
 >      }
 >      if ((flag & VIS_NOSLASH) == 0)
 > -            *dst++ = '\\';
 > +            *dst++ = L'\\';
 >      if (c & 0200) {
 >              c &= 0177;
 > -            *dst++ = 'M';
 > +            *dst++ = L'M';
 >      }
 > -    if (iscntrl(c)) {
 > -            *dst++ = '^';
 > +    if (iswcntrl(c)) {
 > +            *dst++ = L'^';
 >              if (c == 0177)
 > -                    *dst++ = '?';
 > +                    *dst++ = L'?';
 >              else
 > -                    *dst++ = c + '@';
 > +                    *dst++ = c + L'@';
 >      } else {
 > -            *dst++ = '-';
 > +            *dst++ = L'-';
 >              *dst++ = c;
 >      }
 >  done:
 > -    *dst = '\0';
 > +    *dst = L'\0';
 >      return (dst);
 >  }
 >  
 >  /*
 > + * vis - visually encode characters
 > + */
 > +char *
 > +vis(dst, c, flag, nextc)
 > +    char *dst;
 > +    int c, nextc;
 > +    int flag;
 > +{
 > +    /*
 > +     * Output may be up to 4 times the size of input plus
 > +     * 1 for the NUL.
 > +     */
 > +    wchar_t res[5];
 > +
 > +    _vis(res, (wint_t) c, flag, (wint_t) nextc);
 > +    wcstombs(dst, res, wcslen(res)+sizeof(wchar_t));
 > +    return (dst + strlen(dst));
 > +}
 > +
 > +/*
 >   * strvis, strvisx - visually encode characters from src into dst
 >   *
 >   *  Dst must be 4 times the size of src to account for possible
 >   *  expansion.  The length of dst, not including the trailing NUL,
 >   *  is returned.
 >   *
 > - *  Strvisx encodes exactly len bytes from src into dst.
 > + *  Strvisx encodes exactly len characters from src into dst.
 >   *  This is useful for encoding a block of data.
 >   */
 >  int
 > -strvis(dst, src, flag)
 > -    char *dst;
 > -    const char *src;
 > +strvis(mbdst, mbsrc, flag)
 > +    char *mbdst;
 > +    const char *mbsrc;
 >      int flag;
 >  {
 > -    char c;
 > -    char *start;
 > +    wchar_t *dst, *src;
 > +    wchar_t *pdst, *psrc;
 > +    wchar_t c;
 > +    wchar_t *start;
 > +
 > +    if ((psrc = (wchar_t *) calloc((strlen(mbsrc) + 1),
 > +        sizeof(wchar_t))) == NULL)
 > +            return -1;
 > +    if ((pdst = (wchar_t *) calloc(((4 * strlen(mbsrc)) + 1),
 > +        sizeof(wchar_t))) == NULL) {
 > +            free((void *) psrc);
 > +            return -1;
 > +    }
 > +
 > +    dst = pdst;
 > +    src = psrc;
 > +
 > +    mbstowcs(src, mbsrc, strlen(mbsrc) + 1);
 >  
 >      for (start = dst; (c = *src); )
 > -            dst = vis(dst, c, flag, *++src);
 > -    *dst = '\0';
 > +            dst = _vis(dst, c, flag, *++src);
 > +
 > +    wcstombs(mbdst, start, dst - start + sizeof(wchar_t));
 > +
 > +    free((void *) pdst);
 > +    free((void *) psrc);
 > +
 >      return (dst - start);
 >  }
 >  
 >  int
 > -strvisx(dst, src, len, flag)
 > -    char *dst;
 > -    const char *src;
 > -    size_t len;
 > +strvisx(mbdst, mbsrc, mblen, flag)
 > +    char *mbdst;
 > +    const char *mbsrc;
 > +    size_t mblen;
 >      int flag;
 >  {
 > -    int c;
 > -    char *start;
 > +    wchar_t *dst, *src;
 > +    wchar_t *pdst, *psrc;
 > +    wchar_t c;
 > +    wchar_t *start;
 > +    size_t len;
 > +
 > +    if ((psrc = (wchar_t *) calloc((strlen(mbsrc) + 1),
 > +        sizeof(wchar_t))) == NULL)
 > +            return -1;
 > +    if ((pdst = (wchar_t *) calloc(((4 * strlen(mbsrc)) + 1),
 > +        sizeof(wchar_t))) == NULL) {
 > +            free((void *) psrc);
 > +            return -1;
 > +    }
 > +
 > +    dst = pdst;
 > +    src = psrc;
 >  
 > -    for (start = dst; len > 1; len--) {
 > +    len = mbstowcs(src, mbsrc, strlen(mbsrc) + 1);
 > +
 > +    if (len < mblen)
 > +            mblen = len;
 > +
 > +    for (start = dst; mblen > 1; mblen--) {
 >              c = *src;
 > -            dst = vis(dst, c, flag, *++src);
 > +            dst = _vis(dst, c, flag, *++src);
 >      }
 > -    if (len)
 > -            dst = vis(dst, *src, flag, '\0');
 > -    *dst = '\0';
 > +    if (mblen)
 > +            dst = _vis(dst, *src, flag, L'\0');
 > +
 > +    wcstombs(mbdst, start, dst - start + sizeof(wchar_t));
 > +
 > +    free((void *) pdst);
 > +    free((void *) psrc);
 >  
 >      return (dst - start);
 >  }
 > --- lib/libc/gen/vis.3.orig  2013-01-02 19:26:40.000000000 -0500
 > +++ lib/libc/gen/vis.3       2013-01-17 14:28:02.000000000 -0500
 > @@ -300,9 +300,6 @@
 >  .Sh HISTORY
 >  These functions first appeared in
 >  .Bx 4.4 .
 > -.Sh BUGS
 > -The
 > -.Nm
 > -family of functions do not recognize multibyte characters, and thus
 > -may consider them to be non-printable when they are in fact printable
 > -(and vice versa.)
 > +.Pp
 > +The functions were augmented to add multibyte character support in
 > +.Fx 9.1 .
 > --- usr.bin/vis/vis.c.orig   2013-01-02 19:15:19.000000000 -0500
 > +++ usr.bin/vis/vis.c        2013-01-16 20:21:54.000000000 -0500
 > @@ -45,6 +45,7 @@
 >  #include <locale.h>
 >  #include <stdio.h>
 >  #include <stdlib.h>
 > +#include <wchar.h>
 >  #include <unistd.h>
 >  #include <vis.h>
 >  
 > @@ -139,12 +140,12 @@
 >      static int col = 0;
 >      static char dummy[] = "\0";
 >      char *cp = dummy+1; /* so *(cp-1) starts out != '\n' */
 > -    int c, rachar;
 > +    wint_t c, rachar;
 >      char buff[5];
 >  
 > -    c = getc(fp);
 > +    c = getwc(fp);
 >      while (c != EOF) {
 > -            rachar = getc(fp);
 > +            rachar = getwc(fp);
 >              if (none) {
 >                      cp = buff;
 >                      *cp++ = c;
 > @@ -159,7 +160,7 @@
 >                      *cp++ = '\n';
 >                      *cp = '\0';
 >              } else
 > -                    (void) vis(buff, (char)c, eflags, (char)rachar);
 > +                    (void) vis(buff, c, eflags, rachar);
 >  
 >              cp = buff;
 >              if (fold) {
 > --- usr.bin/vis/vis.1.orig   2013-01-02 19:15:19.000000000 -0500
 > +++ usr.bin/vis/vis.1        2013-01-17 14:34:16.000000000 -0500
 > @@ -128,11 +128,11 @@
 >  .Nm
 >  command appeared in
 >  .Bx 4.4 .
 > -.Sh BUGS
 > -Due to limitations in the underlying
 > +.Pp
 > +The underlying
 >  .Xr vis 3
 > -function, the
 > +function was augmented to add multibyte character support in
 > +.Fx 9.1
 > +at which point the
 >  .Nm
 > -utility
 > -does not recognize multibyte characters, and thus may consider them to be
 > -non-printable when they are in fact printable (and vice versa).
 > +utility was also updated to be multibyte character aware.
 > >Release-Note:
 > >Audit-Trail:
 > >Unformatted:
 > _______________________________________________
 > freebsd-bugs@freebsd.org mailing list
 > http://lists.freebsd.org/mailman/listinfo/freebsd-bugs
 > To unsubscribe, send any mail to "freebsd-bugs-unsubscr...@freebsd.org"
 > 
_______________________________________________
freebsd-bugs@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-bugs
To unsubscribe, send any mail to "freebsd-bugs-unsubscr...@freebsd.org"

Reply via email to