Author: brooks
Date: Tue Apr 16 19:27:09 2013
New Revision: 249560
URL: http://svnweb.freebsd.org/changeset/base/249560

Log:
  MFC r248302:
  
  Update to the latest (un)vis(3) sources from NetBSD.  This adds
  multibyte support[0] and the new functions strenvisx and strsenvisx.
  
  Add MLINKS for vis(3) functions add by this and the initial import from
  NetBSD[1].
  
  PR:           bin/166364, bin/175418
  Submitted by: "J.R. Oldroyd" <f...@opal.com>[0]
                stefanf[1]
  Obtained from:        NetBSD

Modified:
  stable/9/contrib/libc-vis/unvis.3
  stable/9/contrib/libc-vis/unvis.c
  stable/9/contrib/libc-vis/vis.3
  stable/9/contrib/libc-vis/vis.c
  stable/9/contrib/libc-vis/vis.h
  stable/9/lib/libc/gen/Makefile.inc
  stable/9/lib/libc/gen/Symbol.map
Directory Properties:
  stable/9/contrib/libc-vis/   (props changed)
  stable/9/lib/libc/   (props changed)

Modified: stable/9/contrib/libc-vis/unvis.3
==============================================================================
--- stable/9/contrib/libc-vis/unvis.3   Tue Apr 16 19:25:41 2013        
(r249559)
+++ stable/9/contrib/libc-vis/unvis.3   Tue Apr 16 19:27:09 2013        
(r249560)
@@ -1,4 +1,4 @@
-.\"    $NetBSD: unvis.3,v 1.23 2011/03/17 14:06:29 wiz Exp $
+.\"    $NetBSD: unvis.3,v 1.27 2012/12/15 07:34:36 wiz Exp $
 .\"    $FreeBSD$
 .\"
 .\" Copyright (c) 1989, 1991, 1993
@@ -126,15 +126,17 @@ The
 function has several return codes that must be handled properly.
 They are:
 .Bl -tag -width UNVIS_VALIDPUSH
-.It Li \&0 (zero)
+.It Li \&0 No (zero)
 Another character is necessary; nothing has been recognized yet.
 .It Dv UNVIS_VALID
 A valid character has been recognized and is available at the location
-pointed to by cp.
+pointed to by
+.Fa cp .
 .It Dv UNVIS_VALIDPUSH
 A valid character has been recognized and is available at the location
-pointed to by cp; however, the character currently passed in should
-be passed in again.
+pointed to by
+.Fa cp ;
+however, the character currently passed in should be passed in again.
 .It Dv UNVIS_NOCHAR
 A valid sequence was detected, but no character was produced.
 This return code is necessary to indicate a logical break between characters.
@@ -150,7 +152,7 @@ one more time with flag set to
 to extract any remaining character (the character passed in is ignored).
 .Pp
 The
-.Ar flag
+.Fa flag
 argument is also used to specify the encoding style of the source.
 If set to
 .Dv VIS_HTTPSTYLE
@@ -161,7 +163,8 @@ will decode URI strings as specified in 
 If set to
 .Dv VIS_HTTP1866 ,
 .Fn unvis
-will decode URI strings as specified in RFC 1866.
+will decode entity references and numeric character references
+as specified in RFC 1866.
 If set to
 .Dv VIS_MIMESTYLE ,
 .Fn unvis
@@ -169,7 +172,9 @@ will decode MIME Quoted-Printable string
 If set to
 .Dv VIS_NOESCAPE ,
 .Fn unvis
-will not decode \e quoted characters.
+will not decode
+.Ql \e
+quoted characters.
 .Pp
 The following code fragment illustrates a proper use of
 .Fn unvis .
@@ -204,7 +209,7 @@ The functions
 and
 .Fn strnunvisx
 will return \-1 on error and set
-.Va errno 
+.Va errno
 to:
 .Bl -tag -width Er
 .It Bq Er EINVAL
@@ -212,7 +217,7 @@ An invalid escape sequence was detected,
 .El
 .Pp
 In addition the functions
-.Fn strnunvis 
+.Fn strnunvis
 and
 .Fn strnunvisx
 will can also set
@@ -244,4 +249,14 @@ and
 functions appeared in
 .Nx 6.0
 and
-.Fx 10.0 .
+.Fx 9.2 .
+.Sh BUGS
+The names
+.Dv VIS_HTTP1808
+and
+.Dv VIS_HTTP1866
+are wrong.
+Percent-encoding was defined in RFC 1738, the original RFC for URL.
+RFC 1866 defines HTML 2.0, an application of SGML, from which it
+inherits concepts of numeric character references and entity
+references.

Modified: stable/9/contrib/libc-vis/unvis.c
==============================================================================
--- stable/9/contrib/libc-vis/unvis.c   Tue Apr 16 19:25:41 2013        
(r249559)
+++ stable/9/contrib/libc-vis/unvis.c   Tue Apr 16 19:27:09 2013        
(r249560)
@@ -1,4 +1,4 @@
-/*     $NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $      */
+/*     $NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $  */
 
 /*-
  * Copyright (c) 1989, 1993
@@ -34,7 +34,7 @@
 #if 0
 static char sccsid[] = "@(#)unvis.c    8.1 (Berkeley) 6/4/93";
 #else
-__RCSID("$NetBSD: unvis.c,v 1.40 2012/12/14 21:31:01 christos Exp $");
+__RCSID("$NetBSD: unvis.c,v 1.41 2012/12/15 04:29:53 matt Exp $");
 #endif
 #endif /* LIBC_SCCS and not lint */
 __FBSDID("$FreeBSD$");
@@ -90,7 +90,7 @@ __weak_alias(strnunvisx,_strnunvisx)
  * RFC 1866
  */
 static const struct nv {
-       const char name[7];
+       char name[7];
        uint8_t value;
 } nv[] = {
        { "AElig",      198 }, /* capital AE diphthong (ligature)  */

Modified: stable/9/contrib/libc-vis/vis.3
==============================================================================
--- stable/9/contrib/libc-vis/vis.3     Tue Apr 16 19:25:41 2013        
(r249559)
+++ stable/9/contrib/libc-vis/vis.3     Tue Apr 16 19:27:09 2013        
(r249560)
@@ -1,4 +1,4 @@
-.\"    $NetBSD: vis.3,v 1.29 2012/12/14 22:55:59 christos Exp $
+.\"    $NetBSD: vis.3,v 1.39 2013/02/20 20:05:26 christos Exp $
 .\"    $FreeBSD$
 .\"
 .\" Copyright (c) 1989, 1991, 1993
@@ -30,7 +30,7 @@
 .\"
 .\"     @(#)vis.3      8.1 (Berkeley) 6/9/93
 .\"
-.Dd December 14, 2012
+.Dd February 19, 2013
 .Dt VIS 3
 .Os
 .Sh NAME
@@ -40,12 +40,14 @@
 .Nm strnvis ,
 .Nm strvisx ,
 .Nm strnvisx ,
+.Nm strenvisx ,
 .Nm svis ,
 .Nm snvis ,
 .Nm strsvis ,
 .Nm strsnvis ,
-.Nm strsvisx
-.Nm strsnvisx
+.Nm strsvisx ,
+.Nm strsnvisx ,
+.Nm strsenvisx
 .Nd visually encode characters
 .Sh LIBRARY
 .Lb libc
@@ -63,6 +65,8 @@
 .Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
 .Ft int
 .Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int 
flag"
+.Ft int
+.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int 
flag" "int *cerr_ptr"
 .Ft char *
 .Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
 .Ft char *
@@ -75,6 +79,8 @@
 .Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char 
*extra"
 .Ft int
 .Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int 
flag" "const char *extra"
+.Ft int
+.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int 
flag" "const char *extra" "int *cerr_ptr"
 .Sh DESCRIPTION
 The
 .Fn vis
@@ -89,11 +95,11 @@ needs no encoding, it is copied in unalt
 The string is null terminated, and a pointer to the end of the string is
 returned.
 The maximum length of any encoding is four
-characters (not including the trailing
+bytes (not including the trailing
 .Dv NUL ) ;
 thus, when
 encoding a set of characters into a buffer, the size of the buffer should
-be four times the number of characters encoded, plus one for the trailing
+be four times the number of bytes encoded, plus one for the trailing
 .Dv NUL .
 The flag parameter is used for altering the default range of
 characters considered for encoding and for altering the visual
@@ -142,16 +148,17 @@ terminate
 The size of
 .Fa dst
 must be four times the number
-of characters encoded from
+of bytes encoded from
 .Fa src
 (plus one for the
 .Dv NUL ) .
 Both
-forms return the number of characters in dst (not including
-the trailing
+forms return the number of characters in
+.Fa dst
+(not including the trailing
 .Dv NUL ) .
 The
-.Dq n
+.Dq Nm n
 versions of the functions also take an additional argument
 .Fa dlen
 that indicates the length of the
@@ -159,7 +166,7 @@ that indicates the length of the
 buffer.
 If
 .Fa dlen
-is not large enough to fix the converted string then the
+is not large enough to fit the converted string then the
 .Fn strnvis
 and
 .Fn strnvisx
@@ -167,6 +174,14 @@ functions return \-1 and set
 .Va errno
 to
 .Dv ENOSPC .
+The
+.Fn strenvisx
+function takes an additional argument,
+.Fa cerr_ptr ,
+that is used to pass in and out a multibyte conversion error flag.
+This is useful when processing single characters at a time when
+it is possible that the locale may be set to something other
+than the locale of the characters in the input data.
 .Pp
 The functions
 .Fn svis ,
@@ -174,16 +189,18 @@ The functions
 .Fn strsvis ,
 .Fn strsnvis ,
 .Fn strsvisx ,
+.Fn strsnvisx ,
 and
-.Fn strsnvisx
+.Fn strsenvisx
 correspond to
 .Fn vis ,
 .Fn nvis ,
 .Fn strvis ,
 .Fn strnvis ,
 .Fn strvisx ,
+.Fn strnvisx ,
 and
-.Fn strnvisx
+.Fn strenvisx
 but have an additional argument
 .Fa extra ,
 pointing to a
@@ -214,14 +231,13 @@ and
 .Fn strnvisx ) ,
 and the type of representation used.
 By default, all non-graphic characters,
-except space, tab, and newline are encoded.
-(See
-.Xr isgraph 3 . )
+except space, tab, and newline are encoded (see
+.Xr isgraph 3 ) .
 The following flags
 alter this:
 .Bl -tag -width VIS_WHITEX
 .It Dv VIS_GLOB
-Also encode magic characters
+Also encode the magic characters
 .Ql ( * ,
 .Ql \&? ,
 .Ql \&[
@@ -243,11 +259,13 @@ Synonym for
 \&|
 .Dv VIS_NL .
 .It Dv VIS_SAFE
-Only encode "unsafe" characters.
+Only encode
+.Dq unsafe
+characters.
 Unsafe means control characters which may cause common terminals to perform
 unexpected functions.
 Currently this form allows space, tab, newline, backspace, bell, and
-return - in addition to all graphic characters - unencoded.
+return \(em in addition to all graphic characters \(em unencoded.
 .El
 .Pp
 (The above flags have no effect for
@@ -287,8 +305,8 @@ Use an
 to represent meta characters (characters with the 8th
 bit set), and use caret
 .Ql ^
-to represent control characters see
-.Pf ( Xr iscntrl 3 ) .
+to represent control characters (see
+.Xr iscntrl 3 ) .
 The following formats are used:
 .Bl -tag -width xxxxx
 .It Dv \e^C
@@ -335,19 +353,20 @@ Use C-style backslash sequences to repre
 characters.
 The following sequences are used to represent the indicated characters:
 .Bd -unfilled -offset indent
-.Li \ea Tn  - BEL No (007)
-.Li \eb Tn  - BS No (010)
-.Li \ef Tn  - NP No (014)
-.Li \en Tn  - NL No (012)
-.Li \er Tn  - CR No (015)
-.Li \es Tn  - SP No (040)
-.Li \et Tn  - HT No (011)
-.Li \ev Tn  - VT No (013)
-.Li \e0 Tn  - NUL No (000)
+.Li \ea Tn  \(em BEL No (007)
+.Li \eb Tn  \(em BS No (010)
+.Li \ef Tn  \(em NP No (014)
+.Li \en Tn  \(em NL No (012)
+.Li \er Tn  \(em CR No (015)
+.Li \es Tn  \(em SP No (040)
+.Li \et Tn  \(em HT No (011)
+.Li \ev Tn  \(em VT No (013)
+.Li \e0 Tn  \(em NUL No (000)
 .Ed
 .Pp
-When using this format, the nextc parameter is looked at to determine
-if a
+When using this format, the
+.Fa nextc
+parameter is looked at to determine if a
 .Dv NUL
 character can be encoded as
 .Ql \e0
@@ -374,8 +393,8 @@ represents a lower case hexadecimal digi
 .It Dv VIS_MIMESTYLE
 Use MIME Quoted-Printable encoding as described in RFC 2045, only don't
 break lines and don't handle CRLF.
-The form is:
-.Ql %XX
+The form is
+.Ql =XX
 where
 .Em X
 represents an upper case hexadecimal digit.
@@ -392,6 +411,41 @@ meta characters as
 .Ql M-C ) .
 With this flag set, the encoding is
 ambiguous and non-invertible.
+.Sh MULTIBYTE CHARACTER SUPPORT
+These functions support multibyte character input.
+The encoding conversion is influenced by the setting of the
+.Ev LC_CTYPE
+environment variable which defines the set of characters
+that can be copied without encoding.
+.Pp
+When 8-bit data is present in the input,
+.Ev LC_CTYPE
+must be set to the correct locale or to the C locale.
+If the locales of the data and the conversion are mismatched,
+multibyte character recognition may fail and encoding will be performed
+byte-by-byte instead.
+.Pp
+As noted above,
+.Fa dst
+must be four times the number of bytes processed from
+.Fa src .
+But note that each multibyte character can be up to
+.Dv MB_LEN_MAX
+bytes
+.\" (see
+.\" .Xr multibyte 3 )
+so in terms of multibyte characters,
+.Fa dst
+must be four times
+.Dv MB_LEN_MAX
+times the number of characters processed from
+.Fa src .
+.Sh ENVIRONMENT
+.Bl -tag -width ".Ev LC_CTYPE"
+.It Ev LC_CTYPE
+Specify the locale of the input data.
+Set to C if the input data locale is unknown.
+.El
 .Sh ERRORS
 The functions
 .Fn nvis
@@ -407,11 +461,11 @@ and
 .Fn strsnvisx ,
 will return \-1 when the
 .Fa dlen
-destination buffer length size is not enough to perform the conversion while
+destination buffer size is not enough to perform the conversion while
 setting
 .Va errno
 to:
-.Bl -tag -width Er
+.Bl -tag -width ".Bq Er ENOSPC"
 .It Bq Er ENOSPC
 The destination buffer size is not large enough to perform the conversion.
 .El
@@ -419,18 +473,23 @@ The destination buffer size is not large
 .Xr unvis 1 ,
 .Xr vis 1 ,
 .Xr glob 3 ,
+.\" .Xr multibyte 3 ,
 .Xr unvis 3
 .Rs
 .%A T. Berners-Lee
 .%T Uniform Resource Locators (URL)
-.%O RFC1738
+.%O "RFC 1738"
+.Re
+.Rs
+.%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet 
Message Bodies"
+.%O "RFC 2045"
 .Re
 .Sh HISTORY
 The
 .Fn vis ,
 .Fn strvis ,
 and
-.Fa strvisx
+.Fn strvisx
 functions first appeared in
 .Bx 4.4 .
 The
@@ -441,7 +500,7 @@ and
 functions appeared in
 .Nx 1.5
 and
-.Fx 10.0 .
+.Fx 9.2 .
 The buffer size limited versions of the functions
 .Po Fn nvis ,
 .Fn strnvis ,
@@ -451,6 +510,9 @@ The buffer size limited versions of the 
 and
 .Fn strsnvisx Pc
 appeared in
-.Nx 6.0
 and
-.Fx 10.0 .
+.Fx 9.2 .
+Myltibyte character support was added in
+.Nx 7.0
+and
+.Fx 9.2 .

Modified: stable/9/contrib/libc-vis/vis.c
==============================================================================
--- stable/9/contrib/libc-vis/vis.c     Tue Apr 16 19:25:41 2013        
(r249559)
+++ stable/9/contrib/libc-vis/vis.c     Tue Apr 16 19:27:09 2013        
(r249560)
@@ -1,4 +1,4 @@
-/*     $NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $        */
+/*     $NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $   */
 
 /*-
  * Copyright (c) 1989, 1993
@@ -57,19 +57,23 @@
 
 #include <sys/cdefs.h>
 #if defined(LIBC_SCCS) && !defined(lint)
-__RCSID("$NetBSD: vis.c,v 1.45 2012/12/14 21:38:18 christos Exp $");
+__RCSID("$NetBSD: vis.c,v 1.60 2013/02/21 16:21:20 joerg Exp $");
 #endif /* LIBC_SCCS and not lint */
+#ifdef __FBSDID
 __FBSDID("$FreeBSD$");
+#define        _DIAGASSERT(x)  assert(x)
+#endif
 
 #include "namespace.h"
 #include <sys/types.h>
+#include <sys/param.h>
 
 #include <assert.h>
 #include <vis.h>
 #include <errno.h>
 #include <stdlib.h>
-
-#define        _DIAGASSERT(x)  assert(x)
+#include <wchar.h>
+#include <wctype.h>
 
 #ifdef __weak_alias
 __weak_alias(strvisx,_strvisx)
@@ -81,65 +85,66 @@ __weak_alias(strvisx,_strvisx)
 #include <stdio.h>
 #include <string.h>
 
-static char *do_svis(char *, size_t *, int, int, int, const char *);
+/*
+ * The reason for going through the trouble to deal with character encodings
+ * in vis(3), is that we use this to safe encode output of commands. This
+ * safe encoding varies depending on the character set. For example if we
+ * display ps output in French, we don't want to display French characters
+ * as M-foo.
+ */
+
+static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
 
 #undef BELL
-#define BELL '\a'
+#define BELL L'\a'
+
+#define iswoctal(c)    (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
+#define iswwhite(c)    (c == L' ' || c == L'\t' || c == L'\n')
+#define iswsafe(c)     (c == L'\b' || c == BELL || c == L'\r')
+#define xtoa(c)                L"0123456789abcdef"[c]
+#define XTOA(c)                L"0123456789ABCDEF"[c]
 
-#define isoctal(c)     (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
-#define iswhite(c)     (c == ' ' || c == '\t' || c == '\n')
-#define issafe(c)      (c == '\b' || c == BELL || c == '\r')
-#define xtoa(c)                "0123456789abcdef"[c]
-#define XTOA(c)                "0123456789ABCDEF"[c]
-
-#define MAXEXTRAS      9
-
-#define MAKEEXTRALIST(flag, extra, orig_str)                                 \
-do {                                                                         \
-       const char *orig = orig_str;                                          \
-       const char *o = orig;                                                 \
-       char *e;                                                              \
-       while (*o++)                                                          \
-               continue;                                                     \
-       extra = malloc((size_t)((o - orig) + MAXEXTRAS));                     \
-       if (!extra) break;                                                    \
-       for (o = orig, e = extra; (*e++ = *o++) != '\0';)                     \
-               continue;                                                     \
-       e--;                                                                  \
-       if (flag & VIS_GLOB) {                                                \
-               *e++ = '*';                                                   \
-               *e++ = '?';                                                   \
-               *e++ = '[';                                                   \
-               *e++ = '#';                                                   \
-       }                                                                     \
-       if (flag & VIS_SP) *e++ = ' ';                                        \
-       if (flag & VIS_TAB) *e++ = '\t';                                      \
-       if (flag & VIS_NL) *e++ = '\n';                                       \
-       if ((flag & VIS_NOSLASH) == 0) *e++ = '\\';                           \
-       *e = '\0';                                                            \
-} while (/*CONSTCOND*/0)
+#define MAXEXTRAS      10
+
+#if !HAVE_NBTOOL_CONFIG_H
+#ifndef __NetBSD__
+/*
+ * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
+ * integral type and it is probably wrong, since currently the maximum
+ * number of bytes and character needs is 6. Until this is fixed, the
+ * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
+ * the assertion is commented out.
+ */
+#ifdef __FreeBSD__
+/*
+ * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
+ * mode.
+ */
+#ifndef CTASSERT
+#define CTASSERT(x)             _CTASSERT(x, __LINE__)
+#define _CTASSERT(x, y)         __CTASSERT(x, y)
+#define __CTASSERT(x, y)        typedef char __assert ## y[(x) ? 1 : -1]
+#endif
+#endif /* __FreeBSD__ */
+CTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
+#endif /* !__NetBSD__ */
+#endif
 
 /*
  * This is do_hvis, for HTTP style (RFC 1808)
  */
-static char *
-do_hvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
 {
-
-       if ((isascii(c) && isalnum(c))
+       if (iswalnum(c)
            /* safe */
-           || c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
+           || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
            /* extra */
-           || c == '!' || c == '*' || c == '\'' || c == '(' || c == ')'
-           || c == ',') {
-               dst = do_svis(dst, dlen, c, flag, nextc, extra);
-       } else {
-               if (dlen) {
-                       if (*dlen < 3)
-                               return NULL;
-                       *dlen -= 3;
-               }
-               *dst++ = '%';
+           || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
+           || c == L',')
+               dst = do_svis(dst, c, flags, nextc, extra);
+       else {
+               *dst++ = L'%';
                *dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
                *dst++ = xtoa((unsigned int)c & 0xf);
        }
@@ -151,312 +156,448 @@ do_hvis(char *dst, size_t *dlen, int c, 
  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
  * NB: No handling of long lines or CRLF.
  */
-static char *
-do_mvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
 {
-       if ((c != '\n') &&
+       if ((c != L'\n') &&
            /* Space at the end of the line */
-           ((isspace(c) && (nextc == '\r' || nextc == '\n')) ||
+           ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
            /* Out of range */
-           (!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
-           /* Specific char to be escaped */ 
-           strchr("#$@[\\]^`{|}~", c) != NULL)) {
-               if (dlen) {
-                       if (*dlen < 3)
-                               return NULL;
-                       *dlen -= 3;
-               }
-               *dst++ = '=';
+           (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
+           /* Specific char to be escaped */
+           wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
+               *dst++ = L'=';
                *dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
                *dst++ = XTOA((unsigned int)c & 0xf);
-       } else {
-               dst = do_svis(dst, dlen, c, flag, nextc, extra);
-       }
+       } else
+               dst = do_svis(dst, c, flags, nextc, extra);
        return dst;
 }
 
 /*
- * This is do_vis, the central code of vis.
- * dst:              Pointer to the destination buffer
- * c:        Character to encode
- * flag:      Flag word
- * nextc:     The character following 'c'
- * extra:     Pointer to the list of extra characters to be
- *           backslash-protected.
+ * Output single byte of multibyte character.
  */
-static char *
-do_svis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
 {
-       int isextra;
-       size_t odlen = dlen ? *dlen : 0;
-
-       isextra = strchr(extra, c) != NULL;
-#define HAVE(x) \
-       do { \
-               if (dlen) { \
-                       if (*dlen < (x)) \
-                               goto out; \
-                       *dlen -= (x); \
-               } \
-       } while (/*CONSTCOND*/0)
-       if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) ||
-           ((flag & VIS_SAFE) && issafe(c)))) {
-               HAVE(1);
-               *dst++ = c;
-               return dst;
-       }
-       if (flag & VIS_CSTYLE) {
-               HAVE(2);
+       if (flags & VIS_CSTYLE) {
                switch (c) {
-               case '\n':
-                       *dst++ = '\\'; *dst++ = 'n';
+               case L'\n':
+                       *dst++ = L'\\'; *dst++ = L'n';
                        return dst;
-               case '\r':
-                       *dst++ = '\\'; *dst++ = 'r';
+               case L'\r':
+                       *dst++ = L'\\'; *dst++ = L'r';
                        return dst;
-               case '\b':
-                       *dst++ = '\\'; *dst++ = 'b';
+               case L'\b':
+                       *dst++ = L'\\'; *dst++ = L'b';
                        return dst;
                case BELL:
-                       *dst++ = '\\'; *dst++ = 'a';
+                       *dst++ = L'\\'; *dst++ = L'a';
                        return dst;
-               case '\v':
-                       *dst++ = '\\'; *dst++ = 'v';
+               case L'\v':
+                       *dst++ = L'\\'; *dst++ = L'v';
                        return dst;
-               case '\t':
-                       *dst++ = '\\'; *dst++ = 't';
+               case L'\t':
+                       *dst++ = L'\\'; *dst++ = L't';
                        return dst;
-               case '\f':
-                       *dst++ = '\\'; *dst++ = 'f';
+               case L'\f':
+                       *dst++ = L'\\'; *dst++ = L'f';
                        return dst;
-               case ' ':
-                       *dst++ = '\\'; *dst++ = 's';
+               case L' ':
+                       *dst++ = L'\\'; *dst++ = L's';
                        return dst;
-               case '\0':
-                       *dst++ = '\\'; *dst++ = '0';
-                       if (isoctal(nextc)) {
-                               HAVE(2);
-                               *dst++ = '0';
-                               *dst++ = '0';
+               case L'\0':
+                       *dst++ = L'\\'; *dst++ = L'0';
+                       if (iswoctal(nextc)) {
+                               *dst++ = L'0';
+                               *dst++ = L'0';
                        }
                        return dst;
                default:
-                       if (isgraph(c)) {
-                               *dst++ = '\\'; *dst++ = c;
+                       if (iswgraph(c)) {
+                               *dst++ = L'\\';
+                               *dst++ = c;
                                return dst;
                        }
-                       if (dlen)
-                               *dlen = odlen;
                }
        }
-       if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) {
-               HAVE(4);
-               *dst++ = '\\';
-               *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0';
-               *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0';
-               *dst++ =                             (c       & 07) + '0';
+       if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
+               *dst++ = L'\\';
+               *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
+               *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
+               *dst++ =                             (c       & 07) + L'0';
        } else {
-               if ((flag & VIS_NOSLASH) == 0) {
-                       HAVE(1);
-                       *dst++ = '\\';
-               }
+               if ((flags & VIS_NOSLASH) == 0)
+                       *dst++ = L'\\';
 
                if (c & 0200) {
-                       HAVE(1);
-                       c &= 0177; *dst++ = 'M';
+                       c &= 0177;
+                       *dst++ = L'M';
                }
 
-               if (iscntrl(c)) {
-                       HAVE(2);
-                       *dst++ = '^';
+               if (iswcntrl(c)) {
+                       *dst++ = L'^';
                        if (c == 0177)
-                               *dst++ = '?';
+                               *dst++ = L'?';
                        else
-                               *dst++ = c + '@';
+                               *dst++ = c + L'@';
                } else {
-                       HAVE(2);
-                       *dst++ = '-'; *dst++ = c;
+                       *dst++ = L'-';
+                       *dst++ = c;
                }
        }
+
+       return dst;
+}
+
+/*
+ * This is do_vis, the central code of vis.
+ * dst:              Pointer to the destination buffer
+ * c:        Character to encode
+ * flags:     Flags word
+ * nextc:     The character following 'c'
+ * extra:     Pointer to the list of extra characters to be
+ *           backslash-protected.
+ */
+static wchar_t *
+do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
+{
+       int iswextra, i, shft;
+       uint64_t bmsk, wmsk;
+
+       iswextra = wcschr(extra, c) != NULL;
+       if (!iswextra && (iswgraph(c) || iswwhite(c) ||
+           ((flags & VIS_SAFE) && iswsafe(c)))) {
+               *dst++ = c;
+               return dst;
+       }
+
+       /* See comment in istrsenvisx() output loop, below. */
+       wmsk = 0;
+       for (i = sizeof(wmsk) - 1; i >= 0; i--) {
+               shft = i * NBBY;
+               bmsk = (uint64_t)0xffLL << shft;
+               wmsk |= bmsk;
+               if ((c & wmsk) || i == 0)
+                       dst = do_mbyte(dst, (wint_t)(
+                           (uint64_t)(c & bmsk) >> shft),
+                           flags, nextc, iswextra);
+       }
+
        return dst;
-out:
-       *dlen = odlen;
-       return NULL;
 }
 
-typedef char *(*visfun_t)(char *, size_t *, int, int, int, const char *);
+typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
 
 /*
  * Return the appropriate encoding function depending on the flags given.
  */
 static visfun_t
-getvisfun(int flag)
+getvisfun(int flags)
 {
-       if (flag & VIS_HTTPSTYLE)
+       if (flags & VIS_HTTPSTYLE)
                return do_hvis;
-       if (flag & VIS_MIMESTYLE)
+       if (flags & VIS_MIMESTYLE)
                return do_mvis;
        return do_svis;
 }
 
 /*
- * isnvis - visually encode characters, also encoding the characters
- *       pointed to by `extra'
+ * Expand list of extra characters to not visually encode.
  */
-static char *
-isnvis(char *dst, size_t *dlen, int c, int flag, int nextc, const char *extra)
+static wchar_t *
+makeextralist(int flags, const char *src)
 {
-       char *nextra = NULL;
-       visfun_t f;
+       wchar_t *dst, *d;
+       size_t len;
 
-       _DIAGASSERT(dst != NULL);
-       _DIAGASSERT(extra != NULL);
-       MAKEEXTRALIST(flag, nextra, extra);
-       if (!nextra) {
-               if (dlen && *dlen == 0) {
-                       errno = ENOSPC;
-                       return NULL;
-               }
-               *dst = '\0';            /* can't create nextra, return "" */
-               return dst;
-       }
-       f = getvisfun(flag);
-       dst = (*f)(dst, dlen, c, flag, nextc, nextra);
-       free(nextra);
-       if (dst == NULL || (dlen && *dlen == 0)) {
-               errno = ENOSPC;
+       len = strlen(src);
+       if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
                return NULL;
-       }
-       *dst = '\0';
-       return dst;
-}
 
-char *
-svis(char *dst, int c, int flag, int nextc, const char *extra)
-{
-       return isnvis(dst, NULL, c, flag, nextc, extra);
-}
+       if (mbstowcs(dst, src, len) == (size_t)-1) {
+               size_t i;
+               for (i = 0; i < len; i++)
+                       dst[i] = (wint_t)(u_char)src[i];
+               d = dst + len;
+       } else
+               d = dst + wcslen(dst);
+
+       if (flags & VIS_GLOB) {
+               *d++ = L'*';
+               *d++ = L'?';
+               *d++ = L'[';
+               *d++ = L'#';
+       }
+
+       if (flags & VIS_SP) *d++ = L' ';
+       if (flags & VIS_TAB) *d++ = L'\t';
+       if (flags & VIS_NL) *d++ = L'\n';
+       if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
+       *d = L'\0';
 
-char *
-snvis(char *dst, size_t dlen, int c, int flag, int nextc, const char *extra)
-{
-       return isnvis(dst, &dlen, c, flag, nextc, extra);
+       return dst;
 }
 
-
 /*
- * strsvis, strsvisx - visually encode characters from src into dst
- *
- *     Extra is a pointer to a \0-terminated list of characters to
- *     be encoded, too. These functions are useful e. g. to
- *     encode strings in such a way so that they are not interpreted
- *     by a shell.
- *
- *     Dst must be 4 times the size of src to account for possible
- *     expansion.  The length of dst, not including the trailing NULL,
- *     is returned.
- *
- *     Strsvisx encodes exactly len bytes from src into dst.
- *     This is useful for encoding a block of data.
+ * istrsenvisx()
+ *     The main internal function.
+ *     All user-visible functions call this one.
  */
 static int
-istrsnvis(char *dst, size_t *dlen, const char *csrc, int flag, const char 
*extra)
+istrsenvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
+    int flags, const char *mbextra, int *cerr_ptr)
 {
-       int c;
-       char *start;
-       char *nextra = NULL;
-       const unsigned char *src = (const unsigned char *)csrc;
+       wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
+       size_t len, olen;
+       uint64_t bmsk, wmsk;
+       wint_t c;
        visfun_t f;
+       int clen = 0, cerr = 0, error = -1, i, shft;
+       ssize_t mbslength, maxolen;
 
-       _DIAGASSERT(dst != NULL);
-       _DIAGASSERT(src != NULL);
-       _DIAGASSERT(extra != NULL);
-       MAKEEXTRALIST(flag, nextra, extra);
-       if (!nextra) {
-               *dst = '\0';            /* can't create nextra, return "" */
-               return 0;
+       _DIAGASSERT(mbdst != NULL);
+       _DIAGASSERT(mbsrc != NULL);
+       _DIAGASSERT(mbextra != NULL);
+
+       /*
+        * Input (mbsrc) is a char string considered to be multibyte
+        * characters.  The input loop will read this string pulling
+        * one character, possibly multiple bytes, from mbsrc and
+        * converting each to wchar_t in src.
+        *
+        * The vis conversion will be done using the wide char
+        * wchar_t string.
+        *
+        * This will then be converted back to a multibyte string to
+        * return to the caller.
+        */
+
+       /* Allocate space for the wide char strings */
+       psrc = pdst = extra = NULL;
+       if (!mblength)
+               mblength = strlen(mbsrc);
+       if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
+               return -1;
+       if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
+               goto out;
+       dst = pdst;
+       src = psrc;
+
+       /* Use caller's multibyte conversion error flag. */
+       if (cerr_ptr)
+               cerr = *cerr_ptr;
+
+       /*
+        * Input loop.
+        * Handle up to mblength characters (not bytes).  We do not
+        * stop at NULs because we may be processing a block of data
+        * that includes NULs.
+        */
+       mbslength = (ssize_t)mblength;
+       /*
+        * When inputing a single character, must also read in the
+        * next character for nextc, the look-ahead character.
+        */
+       if (mbslength == 1)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-stable-9@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-stable-9
To unsubscribe, send any mail to "svn-src-stable-9-unsubscr...@freebsd.org"

Reply via email to