CVS commit: src

Taylor R Campbell Thu, 15 Aug 2024 14:19:55 -0700

Module Name:    src
Committed By:   riastradh
Date:           Thu Aug 15 21:19:46 UTC 2024


Modified Files:
        src/distrib/sets/lists/comp: mi
        src/distrib/sets/lists/debug: mi
        src/distrib/sets/lists/tests: mi
        src/include: uchar.h
        src/lib/libc/locale: Makefile.inc
        src/share/man/man3: uchar.3
        src/tests/lib/libc/locale: Makefile
Added Files:
        src/lib/libc/locale: c8rtomb.3 c8rtomb.c mbrtoc8.3 mbrtoc8.c
        src/tests/lib/libc/locale: t_c8rtomb.c t_mbrtoc8.c

Log Message:
libc: New functions c8rtomb(3) and mbrtoc8(3).

New in C23, for converting from UTF-8 to locale-dependent multibyte
sequences (c8rtomb) or vice versa (mbrtoc8), along with the new type
char8_t.

Conditional on either:
- _NETBSD_SOURCE
- _ISOC23_SOURCE
- __STDC_VERSION__ >= 202311L

(Riding the libc minor bump from this morning for the UTF-16/UTF-32
versions from C11.)

PR standards/58601: uchar.h C23 compliance: char8_t, mbrtoc8, c8rtomb


To generate a diff of this commit:
cvs rdiff -u -r1.2469 -r1.2470 src/distrib/sets/lists/comp/mi
cvs rdiff -u -r1.443 -r1.444 src/distrib/sets/lists/debug/mi
cvs rdiff -u -r1.1331 -r1.1332 src/distrib/sets/lists/tests/mi
cvs rdiff -u -r1.1 -r1.2 src/include/uchar.h
cvs rdiff -u -r1.66 -r1.67 src/lib/libc/locale/Makefile.inc
cvs rdiff -u -r0 -r1.1 src/lib/libc/locale/c8rtomb.3 \
    src/lib/libc/locale/c8rtomb.c src/lib/libc/locale/mbrtoc8.3 \
    src/lib/libc/locale/mbrtoc8.c
cvs rdiff -u -r1.1 -r1.2 src/share/man/man3/uchar.3
cvs rdiff -u -r1.17 -r1.18 src/tests/lib/libc/locale/Makefile
cvs rdiff -u -r0 -r1.1 src/tests/lib/libc/locale/t_c8rtomb.c \
    src/tests/lib/libc/locale/t_mbrtoc8.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/distrib/sets/lists/comp/mi
diff -u src/distrib/sets/lists/comp/mi:1.2469 src/distrib/sets/lists/comp/mi:1.2470
--- src/distrib/sets/lists/comp/mi:1.2469	Thu Aug 15 14:16:32 2024
+++ src/distrib/sets/lists/comp/mi	Thu Aug 15 21:19:44 2024
@@ -1,4 +1,4 @@
-#	$NetBSD: mi,v 1.2469 2024/08/15 14:16:32 riastradh Exp $
+#	$NetBSD: mi,v 1.2470 2024/08/15 21:19:44 riastradh Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 ./etc/mtree/set.comp				comp-sys-root
@@ -6844,6 +6844,7 @@
 ./usr/share/man/cat3/bzero.0			comp-c-catman		.cat
 ./usr/share/man/cat3/c16rtomb.0			comp-c-catman		.cat
 ./usr/share/man/cat3/c32rtomb.0			comp-c-catman		.cat
+./usr/share/man/cat3/c8rtomb.0			comp-c-catman		.cat
 ./usr/share/man/cat3/cabs.0			comp-c-catman		complex,.cat
 ./usr/share/man/cat3/cabsf.0			comp-c-catman		complex,.cat
 ./usr/share/man/cat3/cabsl.0			comp-c-catman		complex,.cat
@@ -9208,6 +9209,7 @@
 ./usr/share/man/cat3/mbrlen.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mbrtoc16.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mbrtoc32.0			comp-c-catman		.cat
+./usr/share/man/cat3/mbrtoc8.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mbrtowc.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mbsinit.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mbsrtowcs.0		comp-c-catman		.cat
@@ -15409,6 +15411,7 @@
 ./usr/share/man/html3/bzero.html		comp-c-htmlman		html
 ./usr/share/man/html3/c16rtomb.html		comp-c-htmlman		html
 ./usr/share/man/html3/c32rtomb.html		comp-c-htmlman		html
+./usr/share/man/html3/c8rtomb.html		comp-c-htmlman		html
 ./usr/share/man/html3/cabs.html			comp-c-htmlman		complex,html
 ./usr/share/man/html3/cabsf.html		comp-c-htmlman		complex,html
 ./usr/share/man/html3/cabsl.html		comp-c-htmlman		complex,html
@@ -17705,6 +17708,7 @@
 ./usr/share/man/html3/mbrlen.html		comp-c-htmlman		html
 ./usr/share/man/html3/mbrtoc16.html		comp-c-htmlman		html
 ./usr/share/man/html3/mbrtoc32.html		comp-c-htmlman		html
+./usr/share/man/html3/mbrtoc8.html		comp-c-htmlman		html
 ./usr/share/man/html3/mbrtowc.html		comp-c-htmlman		html
 ./usr/share/man/html3/mbsinit.html		comp-c-htmlman		html
 ./usr/share/man/html3/mbsrtowcs.html		comp-c-htmlman		html
@@ -23833,6 +23837,7 @@
 ./usr/share/man/man3/bzero.3			comp-c-man		.man
 ./usr/share/man/man3/c16rtomb.3			comp-c-man		.man
 ./usr/share/man/man3/c32rtomb.3			comp-c-man		.man
+./usr/share/man/man3/c8rtomb.3			comp-c-man		.man
 ./usr/share/man/man3/cabs.3			comp-c-man		complex,.man
 ./usr/share/man/man3/cabsf.3			comp-c-man		complex,.man
 ./usr/share/man/man3/cabsl.3			comp-c-man		complex,.man
@@ -26209,6 +26214,7 @@
 ./usr/share/man/man3/mbrlen.3			comp-c-man		.man
 ./usr/share/man/man3/mbrtoc16.3			comp-c-man		.man
 ./usr/share/man/man3/mbrtoc32.3			comp-c-man		.man
+./usr/share/man/man3/mbrtoc8.3			comp-c-man		.man
 ./usr/share/man/man3/mbrtowc.3			comp-c-man		.man
 ./usr/share/man/man3/mbsinit.3			comp-c-man		.man
 ./usr/share/man/man3/mbsrtowcs.3		comp-c-man		.man

Index: src/distrib/sets/lists/debug/mi
diff -u src/distrib/sets/lists/debug/mi:1.443 src/distrib/sets/lists/debug/mi:1.444
--- src/distrib/sets/lists/debug/mi:1.443	Thu Aug 15 14:16:32 2024
+++ src/distrib/sets/lists/debug/mi	Thu Aug 15 21:19:44 2024
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.443 2024/08/15 14:16:32 riastradh Exp $
+# $NetBSD: mi,v 1.444 2024/08/15 21:19:44 riastradh Exp $
 #
 ./etc/mtree/set.debug                           comp-sys-root
 ./usr/lib					comp-sys-usr		compatdir
@@ -2060,6 +2060,7 @@
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_btowc.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_c16rtomb.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_c32rtomb.debug		tests-lib-debug		debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_c8rtomb.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype1.debug		tests-obsolete		obsolete,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype2.debug		tests-obsolete		obsolete,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_digittoint.debug	tests-lib-debug		debug,atf,compattestfile
@@ -2067,6 +2068,7 @@
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_io.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc16.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc32.debug		tests-lib-debug		debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc8.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtowc.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbsnrtowcs.debug	tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbstowcs.debug		tests-lib-debug		debug,atf,compattestfile

Index: src/distrib/sets/lists/tests/mi
diff -u src/distrib/sets/lists/tests/mi:1.1331 src/distrib/sets/lists/tests/mi:1.1332
--- src/distrib/sets/lists/tests/mi:1.1331	Thu Aug 15 14:16:33 2024
+++ src/distrib/sets/lists/tests/mi	Thu Aug 15 21:19:45 2024
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.1331 2024/08/15 14:16:33 riastradh Exp $
+# $NetBSD: mi,v 1.1332 2024/08/15 21:19:45 riastradh Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 #
@@ -3075,6 +3075,7 @@
 ./usr/tests/lib/libc/locale/t_btowc			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_c16rtomb			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_c32rtomb			tests-lib-tests		compattestfile,atf
+./usr/tests/lib/libc/locale/t_c8rtomb			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_ctype1			tests-obsolete		obsolete
 ./usr/tests/lib/libc/locale/t_ctype2			tests-obsolete		obsolete
 ./usr/tests/lib/libc/locale/t_digittoint		tests-lib-tests		compattestfile,atf
@@ -3082,6 +3083,7 @@
 ./usr/tests/lib/libc/locale/t_io			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_mbrtoc16			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_mbrtoc32			tests-lib-tests		compattestfile,atf
+./usr/tests/lib/libc/locale/t_mbrtoc8			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_mbrtowc			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_mbsnrtowcs		tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_mbstowcs			tests-lib-tests		compattestfile,atf

Index: src/include/uchar.h
diff -u src/include/uchar.h:1.1 src/include/uchar.h:1.2
--- src/include/uchar.h:1.1	Thu Aug 15 13:14:44 2024
+++ src/include/uchar.h	Thu Aug 15 21:19:45 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: uchar.h,v 1.1 2024/08/15 13:14:44 riastradh Exp $	*/
+/*	$NetBSD: uchar.h,v 1.2 2024/08/15 21:19:45 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2024 The NetBSD Foundation, Inc.
@@ -28,9 +28,8 @@
 
 /*
  * C11, 7.28: Unicode utilities <uchar.h>
- *
- *	`1. The header <uchar.h> declares types and functions for
- *	    manipulating Unicode characters.'
+ * C17, 7.28: Unicode utilities <uchar.h> (unchanged from C11)
+ * C23, 7.30: Unicode utilities <uchar.h>
  */
 
 #ifndef	_UCHAR_H
@@ -39,7 +38,20 @@
 #include <sys/ansi.h>
 
 /*
- *	`2. The types declared are mbstate_t (described in 7.30.1) and
+ * C23	`2. The macro
+ *
+ *		__STDC_VERSION_UCHAR_H__
+ *
+ *	    is an integer constant expression with a value equivalent
+ *	    to 202311L.'
+ */
+#if defined(_NETBSD_SOURCE) || defined(_ISOC23_SOURCE) || \
+    __STDC_VERSION__ - 0 >= 202311L
+#define	__STDC_VERSION_UCHAR_H__	202311L
+#endif
+
+/*
+ * C11	`2. The types declared are mbstate_t (described in 7.30.1) and
  *	    size_t (described in 7.19);
  *
  *	    	char16_t
@@ -65,6 +77,17 @@ typedef _BSD_SIZE_T_	size_t;
 #undef _BSD_SIZE_T_
 #endif
 
+/*
+ * C23	`char8_t...is an unsigned integer type used for 8-bit
+ *	 characters and is the same type as unsigned char'
+ */
+#if defined(_NETBSD_SOURCE) || defined(_ISOC23_SOURCE) || \
+    __STDC_VERSION__ - 0 >= 202311L
+#if !defined(__cpp_char8_t) || __cpp_char8_t < 201811L
+typedef unsigned char		char8_t;
+#endif
+#endif
+
 #if !defined(__cplusplus) || __cplusplus < 201103L
 typedef __UINT_LEAST16_TYPE__	char16_t;
 typedef __UINT_LEAST32_TYPE__	char32_t;
@@ -72,6 +95,12 @@ typedef __UINT_LEAST32_TYPE__	char32_t;
 
 __BEGIN_DECLS
 
+#if defined(_NETBSD_SOURCE) || defined(_ISOC23_SOURCE) || \
+    __STDC_VERSION__ - 0 >= 202311L
+size_t	mbrtoc8(char8_t *__restrict, const char *__restrict, size_t,
+	    mbstate_t *__restrict);
+size_t	c8rtomb(char *__restrict, char8_t, mbstate_t *__restrict);
+#endif
 size_t	mbrtoc16(char16_t *__restrict, const char *__restrict, size_t,
 	    mbstate_t *__restrict);
 size_t	c16rtomb(char *__restrict, char16_t, mbstate_t *__restrict);

Index: src/lib/libc/locale/Makefile.inc
diff -u src/lib/libc/locale/Makefile.inc:1.66 src/lib/libc/locale/Makefile.inc:1.67
--- src/lib/libc/locale/Makefile.inc:1.66	Thu Aug 15 14:16:33 2024
+++ src/lib/libc/locale/Makefile.inc	Thu Aug 15 21:19:45 2024
@@ -1,5 +1,5 @@
 #	from: @(#)Makefile.inc	5.1 (Berkeley) 2/18/91
-#	$NetBSD: Makefile.inc,v 1.66 2024/08/15 14:16:33 riastradh Exp $
+#	$NetBSD: Makefile.inc,v 1.67 2024/08/15 21:19:45 riastradh Exp $
 
 # locale sources
 .PATH: ${ARCHDIR}/locale ${.CURDIR}/locale
@@ -13,8 +13,10 @@ SRCS+=	setlocale.c __mb_cur_max.c \
 
 SRCS+=	c16rtomb.c
 SRCS+=	c32rtomb.c
+SRCS+=	c8rtomb.c
 SRCS+=	mbrtoc16.c
 SRCS+=	mbrtoc32.c
+SRCS+=	mbrtoc8.c
 CPPFLAGS.c32rtomb.c+=		-I${LIBCDIR}/citrus
 CPPFLAGS.mbrtoc32.c+=		-I${LIBCDIR}/citrus
 
@@ -38,8 +40,10 @@ MAN+=	btowc.3 mbrtowc.3 mbsrtowcs.3 \
 
 MAN+=	c16rtomb.3
 MAN+=	c32rtomb.3
+MAN+=	c8rtomb.3
 MAN+=	mbrtoc16.3
 MAN+=	mbrtoc32.3
+MAN+=	mbrtoc8.3
 
 MAN+=	iswalnum.3 wctype.3 iswctype.3 \
 	towlower.3 wctrans.3 towctrans.3 \

Index: src/share/man/man3/uchar.3
diff -u src/share/man/man3/uchar.3:1.1 src/share/man/man3/uchar.3:1.2
--- src/share/man/man3/uchar.3:1.1	Thu Aug 15 14:16:34 2024
+++ src/share/man/man3/uchar.3	Thu Aug 15 21:19:45 2024
@@ -1,4 +1,4 @@
-.\"	$NetBSD: uchar.3,v 1.1 2024/08/15 14:16:34 riastradh Exp $
+.\"	$NetBSD: uchar.3,v 1.2 2024/08/15 21:19:45 riastradh Exp $
 .\"
 .\" Copyright (c) 2024 The NetBSD Foundation, Inc.
 .\" All rights reserved.
@@ -24,7 +24,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd August 14, 2024
+.Dd August 15, 2024
 .Dt UCHAR 3
 .Os
 .\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@@ -43,6 +43,12 @@ units.
 .\""""""""""""""""""""""""""""""""""""""
 .Ss Types
 .Bl -tag -width ".Vt char32_t"
+.It Vt char8_t
+(C23)
+Unsigned integer type for UTF-8 code units.
+.Pp
+Same type as
+.Vt unsigned char .
 .It Vt char16_t
 Unsigned integer type for UTF-16 code units.
 .Pp
@@ -86,17 +92,21 @@ and
 The
 .In uchar.h
 header file declares the functions
+.Xr mbrtoc8 3 ,
+.Xr c8rtomb 3 ,
 .Xr mbrtoc16 3 ,
 .Xr c16rtomb 3 ,
 .Xr mbrtoc32 3 ,
 and
 .Xr c32rtomb 3
-for conversion between multibyte sequences and UTF-16/UTF-32 code
+for conversion between multibyte sequences and UTF-8/UTF-16/UTF-32 code
 units.
 .\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 .Sh SEE ALSO
+.Xr c8rtomb 3 ,
 .Xr c16rtomb 3 ,
 .Xr c32rtomb 3 ,
+.Xr mbrtoc8 3 ,
 .Xr mbrtoc16 3 ,
 .Xr mbrtoc32 3
 .Rs
@@ -115,12 +125,22 @@ units.
 .%I Internet Engineering Task Force
 .%U https://datatracker.ietf.org/doc/html/rfc2781
 .Re
+.Rs
+.%A F. Yergeau
+.%T UTF-8, a transformation format of ISO 10646
+.%R RFC 3629
+.%D November 2003
+.%I Internet Engineering Task Force
+.%U https://datatracker.ietf.org/doc/html/rfc3629
+.Re
 .\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
 .Sh STANDARDS
 The
 .In uchar.h
 header file conforms to
 .St -isoC-2011
+.\" .St -isoC-2023
+.\" .\" XXX PR misc/58600: man pages lack C17, C23, C++98, C++03, C++11, C++17, C++20, C++23 citation syntax
 and
 .St -p1003.1-2024 .
 .\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""

Index: src/tests/lib/libc/locale/Makefile
diff -u src/tests/lib/libc/locale/Makefile:1.17 src/tests/lib/libc/locale/Makefile:1.18
--- src/tests/lib/libc/locale/Makefile:1.17	Thu Aug 15 14:16:34 2024
+++ src/tests/lib/libc/locale/Makefile	Thu Aug 15 21:19:45 2024
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.17 2024/08/15 14:16:34 riastradh Exp $
+# $NetBSD: Makefile,v 1.18 2024/08/15 21:19:45 riastradh Exp $
 
 .include <bsd.own.mk>
 
@@ -7,11 +7,13 @@ TESTSDIR=	${TESTSBASE}/lib/libc/locale
 TESTS_C+=	t_btowc
 TESTS_C+=	t_c16rtomb
 TESTS_C+=	t_c32rtomb
+TESTS_C+=	t_c8rtomb
 TESTS_C+=	t_digittoint
 TESTS_C+=	t_ducet
 TESTS_C+=	t_io
 TESTS_C+=	t_mbrtoc16
 TESTS_C+=	t_mbrtoc32
+TESTS_C+=	t_mbrtoc8
 TESTS_C+=	t_mbrtowc
 TESTS_C+=	t_mbsnrtowcs
 TESTS_C+=	t_mbstowcs

Added files:

Index: src/lib/libc/locale/c8rtomb.3
diff -u /dev/null src/lib/libc/locale/c8rtomb.3:1.1
--- /dev/null	Thu Aug 15 21:19:46 2024
+++ src/lib/libc/locale/c8rtomb.3	Thu Aug 15 21:19:45 2024
@@ -0,0 +1,191 @@
+.\"	$NetBSD: c8rtomb.3,v 1.1 2024/08/15 21:19:45 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 15, 2024
+.Dt C8RTOMB 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm c8rtomb
+.Nd Restartable UTF-8 code unit to multibyte conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn c8rtomb "char * restrict s" \
+"char8_t c8" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to encode Unicode input as a multibyte character
+sequence output at
+.Fa s
+in the current locale, writing anywhere between zero and
+.Dv MB_CUR_MAX
+bytes, inclusive, to
+.Fa s ,
+depending on the inputs and conversion state
+.Fa ps .
+.Pp
+The input
+.Fa c8
+is a UTF-8 code unit.
+Successive calls to
+.Nm
+must provide well-formed UTF-8 code unit sequences.
+If
+.Fa c8 ,
+when appended to the sequence of code units passed in previous calls
+with the same state
+.Fa ps ,
+does not form a well-formed UTF-8 code unit sequence, then
+.Nm
+will return
+.Li (size_t)-1
+to denote failure with
+.Xr errno 2
+set to
+.Er EILSEQ .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns the number of bytes written to
+.Fa s
+on success, or sets
+.Xr errno 2
+and returns
+.Li "(size_t)-1"
+on failure.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+Convert a UTF-8 code unit sequence to a multibyte string,
+NUL-terminate it, and print it:
+.Bd -literal -offset indent
+char8_t c8[] = { 0xf0, 0x9f, 0x92, 0xa9 };
+char buf[__arraycount(c8)*MB_CUR_MAX + 1], *s = buf;
+size_t i;
+mbstate_t mbs = {0};	/* initial conversion state */
+
+for (i = 0; i < __arraycount(c8); i++) {
+	size_t len;
+
+	len = c8rtomb(s, c8[i], &mbs);
+	if (len == (size_t)-1)
+		err(1, "c8rtomb");
+	assert(len < sizeof(buf) - (s - buf));
+	s += len;
+}
+*s = '\e0';		/* NUL-terminate */
+printf("%s\en", buf);
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+A surrogate code point was passed as
+.Fa c8
+when it is inappropriate.
+.It Bq Er EILSEQ
+The Unicode scalar value requested cannot be encoded as a multibyte
+sequence in the current locale.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c16rtomb 3 ,
+.Xr c32rtomb 3 ,
+.Xr mbrtoc8 3 ,
+.Xr mbrtoc16 3 ,
+.Xr mbrtoc32 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.Rs
+.%A F. Yergeau
+.%T UTF-8, a transformation format of ISO 10646
+.%R RFC 3629
+.%D November 2003
+.%I Internet Engineering Task Force
+.%U https://datatracker.ietf.org/doc/html/rfc3629
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.\" .Sh STANDARDS
+.\" The
+.\" .Nm
+.\" function conforms to
+.\" .St -isoC-2023 .
+.\" .\" XXX PR misc/58600: man pages lack C17, C23, C++98, C++03, C++11, C++17, C++20, C++23 citation syntax
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh BUGS
+It is not clear from the standard how
+.Nm
+is supposed to behave when given an incomplete UTF-8 code unit sequence
+followed by a NUL:
+.Bd -literal -offset indent
+c8rtomb(s, 0xf0, ps);
+c8rtomb(s, 0x9f, ps);
+c8rtomb(s, 0x92, ps);
+c8rtomb(s, L'\e0', ps);
+.Ed
+.Pp
+Currently this fails with
+.Er EILSEQ
+which matches other implementations, but this is at odds with language
+in the standard which suggests that passing
+.Li L'\e0'
+should unconditionally store a null byte and reset
+.Fa ps
+to the initial conversion state:
+.Bd -offset indent
+If
+.Fa c8
+is a null character, a null byte is stored, preceded by any shift
+sequence needed to restore the initial shift state; the resulting state
+described is the initial conversion state.
+.Ed
+.Pp
+However, it is unclear what else this should store besides a null
+byte.
+Should it discard the pending UTF-8 code unit sequence, or convert it
+to something else and store that?
Index: src/lib/libc/locale/c8rtomb.c
diff -u /dev/null src/lib/libc/locale/c8rtomb.c:1.1
--- /dev/null	Thu Aug 15 21:19:46 2024
+++ src/lib/libc/locale/c8rtomb.c	Thu Aug 15 21:19:45 2024
@@ -0,0 +1,213 @@
+/*	$NetBSD: c8rtomb.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * c8rtomb(s, c8, ps)
+ *
+ *	Encode the Unicode UTF-8 code unit c8 into the multibyte buffer
+ *	s under the current locale, using multibyte encoding state ps.
+ *
+ *	If c8 is not the last byte of a UTF-8 scalar value sequence, no
+ *	output will be produced, but c8 will be remembered; this must
+ *	be followed by another call passing the following bytes.
+ *
+ *	Return the number of bytes stored on success, or (size_t)-1 on
+ *	error with errno set to EILSEQ.
+ *
+ *	At most MB_CUR_MAX bytes will be stored.
+ *
+ * References:
+ *
+ *	The Unicode Standard, Version 15.0 -- Core Specification, The
+ *	Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-8,
+ *	p. 124.
+ *	https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *	https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *
+ *	F. Yergeau, `UTF-8, a transformation format of ISO 10646',
+ *	RFC 3629, Internet Engineering Task Force, November 2003.
+ *	https://datatracker.ietf.org/doc/html/rfc3629
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: c8rtomb.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $");
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdalign.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <uchar.h>
+
+#include "c32rtomb.h"
+
+struct c8rtombstate {
+	char32_t	state_c32; /* 8-bit state and 24-bit buffer */
+	mbstate_t	mbs;
+};
+__CTASSERT(offsetof(struct c8rtombstate, mbs) <= sizeof(mbstate_t));
+__CTASSERT(sizeof(struct c32rtombstate) <= sizeof(mbstate_t) -
+    offsetof(struct c8rtombstate, mbs));
+__CTASSERT(alignof(struct c8rtombstate) <= alignof(mbstate_t));
+
+/*
+ * UTF-8 validation, inspired by Bjoern Hoermann's UTF-8 decoder at
+ * <http://bjoern.hoehrmann.de/utf-8/decoder/dfa/>, but reimplemented
+ * from scratch.
+ */
+
+#define	UTF8_ACCEPT	0
+#define	UTF8_REJECT	96
+
+typedef uint_fast8_t utf8_class_t;
+typedef uint_fast8_t utf8_state_t;
+
+static uint8_t utf8_classtab[] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+   11,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 7,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+};
+
+static uint8_t utf8_statetab[] = {
+     0,96,12,36,48,84,72,60,96,96,96,24, 96, 0,96,96,96,96,96,96, 0, 0,96,96,
+    96,12,96,96,96,96,96,96,96,96,96,96, 96,12,96,96,96,96,96,96,12,12,96,96,
+    96,96,96,96,96,96,96,96,12,12,96,96, 96,36,96,96,96,96,96,96,96,36,96,96,
+    96,36,96,96,96,96,96,96,36,36,96,96, 96,96,96,96,96,96,96,96,36,96,96,96,
+    96,96,96,96,96,96,96,96,96,96,96,96,
+};
+
+static utf8_state_t
+utf8_decode_step(utf8_state_t state, char8_t c8, char32_t *pc32)
+{
+	const utf8_class_t class = utf8_classtab[c8];
+
+	*pc32 = (state == UTF8_ACCEPT
+	    ? (c8 & (0xff >> class))
+	    : ((c8 & 0x3f) | (*pc32 << 6)));
+
+	return utf8_statetab[state + class];
+}
+
+size_t
+c8rtomb(char *restrict s, char8_t c8, mbstate_t *restrict ps)
+{
+	static mbstate_t psbuf;
+	char buf[MB_LEN_MAX];
+	struct c8rtombstate *S;
+	utf8_state_t state;
+	char32_t c32;
+
+	/*
+	 * `If ps is a null pointer, each function uses its own
+	 *  internal mbstate_t object instead, which is initialized at
+	 *  program startup to the initial conversion state; the
+	 *  functions are not required to avoid data races with other
+	 *  calls to the same function in this case.  The
+	 *  implementation behaves as if no library function calls
+	 *  these functions with a null pointer for ps.'
+	 */
+	if (ps == NULL)
+		ps = &psbuf;
+
+	/*
+	 * `If s is a null pointer, the c8rtomb function is equivalent
+	 *  to the call
+	 *
+	 *	c8rtomb(buf, u8'\0', ps)
+	 *
+	 *  where buf is an internal buffer.
+	 */
+	if (s == NULL) {
+		s = buf;
+		c8 = 0;		/* XXX u8'\0' */
+	}
+
+	/*
+	 * Open the private UTF-8 decoding state.
+	 */
+	S = (struct c8rtombstate *)ps;
+
+#if 0
+	/*
+	 * `If c8 is a null character, a null byte is stored, preceded
+	 *  by any shift sequence needed to restore the initial shift
+	 *  state; the resulting state described is the initial
+	 *  conversion state.'
+	 *
+	 * XXX But what else gets stored?  Do we just discard any
+	 * pending sequence, or do we convert it to something else, or
+	 * what?
+	 */
+	if (c8 == u8'\0') {
+		memset(S->buf, 0, sizeof(S->buf));
+		S->n = 0;
+	}
+#endif
+
+	/*
+	 * Get the current state and buffer.
+	 */
+	__CTASSERT(UTF8_ACCEPT == 0); /* initial conversion state */
+	state = __SHIFTOUT(S->state_c32, __BITS(31,24));
+	c32 = __SHIFTOUT(S->state_c32, __BITS(23,0));
+
+	/*
+	 * Feed the byte into the state machine to update the state.
+	 */
+	state = utf8_decode_step(state, c8, &c32);
+	switch (state) {
+	case UTF8_REJECT:
+		/*
+		 * Invalid UTF-8.  Fail with EILSEQ.
+		 */
+		errno = EILSEQ;
+		return (size_t)-1;
+	default:
+		/*
+		 * Valid UTF-8 so far but incomplete.  Update state and
+		 * output nothing.
+		 */
+		S->state_c32 = __SHIFTIN(state, __BITS(31,24)) |
+		    __SHIFTIN(c32, __BITS(23,0));
+		return 0;
+	case UTF8_ACCEPT:
+		/*
+		 * We have a scalar value.  Clear the state and output
+		 * the scalar value.
+		 */
+		__CTASSERT(UTF8_ACCEPT == 0);
+		S->state_c32 = 0;
+		return c32rtomb(s, c32, &S->mbs);
+	}
+}
Index: src/lib/libc/locale/mbrtoc8.3
diff -u /dev/null src/lib/libc/locale/mbrtoc8.3:1.1
--- /dev/null	Thu Aug 15 21:19:46 2024
+++ src/lib/libc/locale/mbrtoc8.3	Thu Aug 15 21:19:45 2024
@@ -0,0 +1,307 @@
+.\"	$NetBSD: mbrtoc8.3,v 1.1 2024/08/15 21:19:45 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 15, 2024
+.Dt MBRTOC8 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm mbrtoc8
+.Nd Restartable multibyte to UTF-8 code unit conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn mbrtoc8 "char8_t * restrict pc8" \
+"const char * restrict s" \
+"size_t n" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to decode a multibyte character sequence at
+.Fa s
+of up to
+.Fa n
+bytes in the current locale, and yield the content as UTF-8 code
+units via the output parameter
+.Fa pc8 .
+.Fa pc8
+may be null, in which case no output is stored.
+.Bl -bullet
+.It
+If the multibyte sequence at
+.Fa s
+is invalid or an error occurs in decoding,
+.Nm
+returns
+.Li (size_t)-1
+and sets
+.Xr errno 2
+to indicate the error.
+.It
+If the multibyte sequence at
+.Fa s
+is still incomplete after
+.Fa n
+bytes, including any previously processed input saved in
+.Fa ps ,
+.Nm
+saves its state in
+.Fa ps
+after all the input so far and returns
+.Li "(size_t)-2".
+.It
+If
+.Nm
+finds the null scalar value at
+.Fa s ,
+then it stores zero at
+.Li * Ns Fa pc8
+and returns zero.
+.It
+If
+.Nm
+finds a nonnull scalar value in the US-ASCII range, i.e., a 7-bit
+scalar value, then it stores the scalar value at
+.Li * Ns Fa pc8 ,
+and returns the number of bytes it read from the input.
+.It
+If
+.Nm
+finds a scalar value outside the US-ASCII range, it:
+.Bl -dash -compact
+.It
+stores the leading byte in the scalar value's UTF-8 encoding at
+.Li * Ns Fa pc8 ;
+.It
+stores conversion state in
+.Fa ps
+to remember the rest of the pending scalar value; and
+.It
+returns the number of bytes it read from the input.
+.El
+.It
+If
+.Nm
+had previously found a scalar value outside the US-ASCII range, then,
+instead of any of the above options, it:
+.Bl -dash -compact
+.It
+stores the next byte in the scalar value's UTF-8 encoding at
+.Li * Ns Fa pc8 ;
+.It
+updates the conversion state in
+.Fa ps
+to consume this byte; and
+.It
+returns
+.Li (size_t)-3
+to indicate that no bytes were consumed but a code unit was yielded
+nevertheless.
+.El
+.El
+.Pp
+If
+.Fa s
+is a null pointer, the
+.Nm
+call is equivalent to:
+.Bd -ragged -offset indent
+.Fo mbrtoc8
+.Li NULL ,
+.Li \*q\*q ,
+.Li 1 ,
+.Fa ps
+.Fc
+.Ed
+.Pp
+This always returns zero, and has the effect of resetting
+.Fa ps
+to the initial conversion state, without writing to
+.Fa pc8 ,
+even if it is nonnull.
+.Pp
+If
+.Fa ps
+is a null pointer,
+.Nm
+uses an internal
+.Vt mbstate_t
+object with static storage duration, distinct from all other
+.Vt mbstate_t
+objects (including those used by
+.Xr mbrtoc16 3 ,
+.Xr mbrtoc32 3 ,
+.Xr c8rtomb 3 ,
+.Xr c16rtomb 3 ,
+and
+.Xr c32rtomb 3 ) ,
+which is initialized at program startup to the initial conversion
+state.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns:
+.Bl -tag -width ".Li (size_t)-3" -offset indent
+.It Li 0
+[null]
+if within the next
+.Fa n
+bytes at
+.Fa s
+the first multibyte character is null.
+.It Fa i
+[code unit]
+where
+.Li 0
+\*(Le
+.Fa i
+\*(Le
+.Fa n ,
+if either
+.Fa ps
+is in the initial conversion state or the previous call to
+.Nm
+with
+.Fa ps
+had not yielded an incomplete UTF-8 code unit, and within the first
+.Fa i
+bytes at
+.Fa s
+a Unicode scalar value was decoded.
+.It Li (size_t)-3
+[continuation]
+if the previous call to
+.Nm
+with
+.Fa ps
+had yielded an incomplete UTF-8 code unit for a Unicode scalar value
+outside the US-ASCII range; no additional input is consumed in this
+case.
+.It Li (size_t)-2
+[incomplete]
+if either
+.Fa ps
+is in the initial conversion state or the previous call to
+.Nm
+with
+.Fa ps
+had not yielded an incomplete UTF-8 code unit, and within the first
+.Fa n
+bytes at
+.Fa s ,
+including any previously buffered input, no complete Unicode scalar
+value could be decoded.
+.It Li (size_t)-1
+[error]
+if any encoding error was detected;
+.Xr errno 2
+is set to reflect the error.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+Print the UTF-8 code units of a multibyte string in hexadecimal text:
+.Bd -literal -offset indent
+char *s = ...;
+size_t n = ...;
+mbstate_t mbs = {0};	/* initial conversion state */
+
+while (n) {
+	char8_t c8;
+	size_t len;
+
+	len = mbrtoc8(&c8, s, n, &mbs);
+	switch (len) {
+	case 0:		/* null terminator */
+		assert(c8 == '\e0');
+		goto out;
+	default:	/* consumed input and yielded a byte c8 */
+		printf("0x%02hhx\en", c8);
+		break;
+	case (size_t)-3: /* yielded a pending byte c8 */
+		printf("continue 0x%02hhx\en", c8);
+		break;
+	case (size_t)-2: /* incomplete */
+		printf("incomplete\en");
+		goto readmore;
+	case (size_t)-1: /* error */
+		printf("error: %d\en", errno);
+		goto out;
+	}
+	s += len;
+	n -= len;
+}
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+The multibyte sequence cannot be decoded as a Unicode scalar value.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c8rtomb 3 ,
+.Xr c16rtomb 3 ,
+.Xr c32rtomb 3 ,
+.Xr mbrtoc16 3 ,
+.Xr mbrtoc32 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.Rs
+.%A F. Yergeau
+.%T UTF-8, a transformation format of ISO 10646
+.%R RFC 3629
+.%D November 2003
+.%I Internet Engineering Task Force
+.%U https://datatracker.ietf.org/doc/html/rfc3629
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.\" .Sh STANDARDS
+.\" The
+.\" .Nm
+.\" function conforms to
+.\" .St -isoC-2023 .
+.\" .\" XXX PR misc/58600: man pages lack C17, C23, C++98, C++03, C++11, C++17, C++20, C++23 citation syntax
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
Index: src/lib/libc/locale/mbrtoc8.c
diff -u /dev/null src/lib/libc/locale/mbrtoc8.c:1.1
--- /dev/null	Thu Aug 15 21:19:46 2024
+++ src/lib/libc/locale/mbrtoc8.c	Thu Aug 15 21:19:45 2024
@@ -0,0 +1,210 @@
+/*	$NetBSD: mbrtoc8.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * mbrtoc16(&c16, s, n, ps)
+ *
+ *	Decode a Unicode scalar value from up to n bytes out of the
+ *	multibyte string s, using multibyte encoding state ps, and
+ *	store the next code unit in the UTF-8 representation of that
+ *	scalar value at c8.
+ *
+ *	If the UTF-8 representation of that scalar value is multiple
+ *	bytes long, mbrtoc8 will yield leading byte in one call that
+ *	consumes input, and will yield the trailing bytes in subsequent
+ *	calls without consuming any input and returning (size_t)-3
+ *	instead.
+ *
+ *	Return the number of bytes consumed on success, or:
+ *
+ *	- 0 if the code unit is NUL, or
+ *	- (size_t)-3 if a trailing byte was returned without consuming
+ *	  any additional input, or
+ *	- (size_t)-2 if the input is incomplete, or
+ *	- (size_t)-1 on error with errno set to EILSEQ.
+ *
+ *	In the case of incomplete input, the decoding state so far
+ *	after processing s[0], s[1], ..., s[n - 1] is saved in ps, so
+ *	subsequent calls to mbrtoc8 will pick up n bytes later into
+ *	the input stream.
+ *
+ * References:
+ *
+ *	The Unicode Standard, Version 15.0 -- Core Specification, The
+ *	Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ *	https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ *	https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ *
+ *	The Unicode Standard, Version 15.0 -- Core Specification, The
+ *	Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-16,
+ *	p. 124.
+ *	https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *	https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *
+ *	F. Yergeau, `UTF-8, a transformation format of ISO 10646',
+ *	RFC 3629, Internet Engineering Task Force, November 2003.
+ *	https://datatracker.ietf.org/doc/html/rfc3629
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: mbrtoc8.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $");
+
+#include <assert.h>
+#include <errno.h>
+#include <stdalign.h>
+#include <stddef.h>
+#include <uchar.h>
+
+#include "mbrtoc32.h"
+
+struct mbrtoc8state {
+	char8_t		nleft;
+	char8_t		buf[3];
+	mbstate_t	mbs;
+};
+__CTASSERT(offsetof(struct mbrtoc8state, mbs) <= sizeof(mbstate_t));
+__CTASSERT(sizeof(struct mbrtoc32state) <= sizeof(mbstate_t) -
+    offsetof(struct mbrtoc8state, mbs));
+__CTASSERT(alignof(struct mbrtoc8state) <= alignof(mbstate_t));
+
+size_t
+mbrtoc8(char8_t *restrict pc8, const char *restrict s, size_t n,
+    mbstate_t *restrict ps)
+{
+	static mbstate_t psbuf;
+	struct mbrtoc8state *S;
+	char32_t c32;
+	size_t len;
+
+	/*
+	 * `If ps is a null pointer, each function uses its own
+	 *  internal mbstate_t object instead, which is initialized at
+	 *  program startup to the initial conversion state; the
+	 *  functions are not required to avoid data races with other
+	 *  calls to the same function in this case.  The
+	 *  implementation behaves as if no library function calls
+	 *  these functions with a null pointer for ps.'
+	 */
+	if (ps == NULL)
+		ps = &psbuf;
+
+	/*
+	 * `If s is a null pointer, the mbrtoc8 function is equivalent
+	 *  to the call:
+	 *
+	 *	mbrtoc8(NULL, "", 1, ps)
+	 *
+	 *  In this case, the values of the parameters pc8 and n are
+	 *  ignored.'
+	 */
+	if (s == NULL) {
+		pc8 = NULL;
+		s = "";
+		n = 1;
+	}
+
+	/*
+	 * Get the private conversion state.
+	 */
+	S = (struct mbrtoc8state *)ps;
+
+	/*
+	 * If there are pending trailing bytes, yield them and return
+	 * (size_t)-3 to indicate that no bytes of input were consumed.
+	 */
+	if (S->nleft) {
+		if (pc8)
+			*pc8 = S->buf[sizeof(S->buf) - S->nleft];
+		S->buf[sizeof(S->buf) - S->nleft] = 0; /* paranoia */
+		S->nleft--;
+		return (size_t)-3;
+	}
+
+	/*
+	 * Consume the next scalar value.  If no full scalar value can
+	 * be obtained, stop here.
+	 */
+	len = mbrtoc32(&c32, s, n, &S->mbs);
+	switch (len) {
+	case 0:			/* NUL */
+		if (pc8)
+			*pc8 = 0;
+		return 0;
+	case (size_t)-2:	/* still incomplete after n bytes */
+	case (size_t)-1:	/* error */
+		return len;
+	default:		/* consumed len bytes of input */
+		break;
+	}
+
+	/*
+	 * We consumed a scalar value from the input.
+	 *
+	 * Encode it as UTF-8, yield the leading byte, and buffer the
+	 * trailing bytes to yield later.
+	 *
+	 * Table 3-6: UTF-8 Bit Distribution
+	 * Table 3-7: Well-Formed UTF-8 Byte Sequences
+	 */
+	switch (c32) {
+	case 0x00 ... 0x7f:
+		if (pc8)
+			*pc8 = c32;
+		_DIAGASSERT(S->nleft == 0);
+		break;
+	case 0x0080 ... 0x07ff:
+		if (pc8)
+			*pc8 = 0xc0 | __SHIFTOUT(c32, __BITS(10,6));
+		S->buf[2] = 0x80 | __SHIFTOUT(c32, __BITS(5,0));
+		S->nleft = 1;
+		break;
+	case 0x0800 ... 0xffff:
+		if (pc8)
+			*pc8 = 0xe0 | __SHIFTOUT(c32, __BITS(15,12));
+		S->buf[1] = 0x80 | __SHIFTOUT(c32, __BITS(11,6));
+		S->buf[2] = 0x80 | __SHIFTOUT(c32, __BITS(5,0));
+		S->nleft = 2;
+		break;
+	case 0x10000 ... 0x10ffff:
+		if (pc8)
+			*pc8 = 0xf0 | __SHIFTOUT(c32, __BITS(20,18));
+		S->buf[0] = 0x80 | __SHIFTOUT(c32, __BITS(17,12));
+		S->buf[1] = 0x80 | __SHIFTOUT(c32, __BITS(11,6));
+		S->buf[2] = 0x80 | __SHIFTOUT(c32, __BITS(5,0));
+		S->nleft = 3;
+		break;
+	default:
+		errno = EILSEQ;
+		return (size_t)-1;
+	}
+
+	/*
+	 * Return the number of bytes consumed from the input.
+	 */
+	return len;
+}

Index: src/tests/lib/libc/locale/t_c8rtomb.c
diff -u /dev/null src/tests/lib/libc/locale/t_c8rtomb.c:1.1
--- /dev/null	Thu Aug 15 21:19:46 2024
+++ src/tests/lib/libc/locale/t_c8rtomb.c	Thu Aug 15 21:19:45 2024
@@ -0,0 +1,205 @@
+/*	$NetBSD: t_c8rtomb.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <e...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for c8rtomb() as specified by C23.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_c8rtomb.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $");
+
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <uchar.h>
+
+#include <atf-c.h>
+
+static void
+require_lc_ctype(const char *locale_name)
+{
+	char *lc_ctype_set;
+
+	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
+	if (lc_ctype_set == NULL)
+		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
+		    locale_name, errno);
+
+	ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
+	    "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
+}
+
+static mbstate_t s;
+static char buf[MB_LEN_MAX + 1];
+
+ATF_TC_WITHOUT_HEAD(c8rtomb_c_locale_test);
+ATF_TC_BODY(c8rtomb_c_locale_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("C");
+
+	/*
+	 * If the buffer argument is NULL, c8 is implicitly 0,
+	 * c8rtomb() resets its internal state.
+	 */
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n);
+
+
+	/* Null wide character. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
+		(unsigned char)buf[1] == 0xcc),
+	    "buf=[%02x %02x]", buf[0], buf[1]);
+
+	/* Latin letter A, internal state. */
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n);
+
+	/* Latin letter A. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 'A', &s)), 1, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
+		(unsigned char)buf[1] == 0xcc),
+	    "buf=[%02x %02x]", buf[0], buf[1]);
+
+	/* Unicode character 'Pile of poo'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xa9, &s)), (size_t)-1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+}
+
+ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_1_test);
+ATF_TC_BODY(c8rtomb_iso_8859_1_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.ISO8859-1");
+
+	/* Unicode character 'Euro sign'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xe2, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x82, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xac, &s)), (size_t)-1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+}
+
+ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_15_test);
+ATF_TC_BODY(c8rtomb_iso_8859_15_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.ISO8859-15");
+
+	/* Unicode character 'Euro sign'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xe2, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x82, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xac, &s)), 1, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 &&
+		(unsigned char)buf[1] == 0xcc),
+	    "buf=[%02x %02x]", buf[0], buf[1]);
+}
+
+ATF_TC_WITHOUT_HEAD(c8rtomb_utf_8_test);
+ATF_TC_BODY(c8rtomb_utf_8_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.UTF-8");
+
+	/* Unicode character 'Pile of poo'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xa9, &s)), 4, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 &&
+		(unsigned char)buf[1] == 0x9f &&
+		(unsigned char)buf[2] == 0x92 &&
+		(unsigned char)buf[3] == 0xa9 &&
+		(unsigned char)buf[4] == 0xcc),
+	    "buf=[%02x %02x %02x %02x %02x]",
+	    buf[0], buf[1], buf[2], buf[3], buf[4]);
+
+	/* Invalid code; 'Pile of poo' without the last byte. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0xf0, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x92, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 'A', &s)), (size_t)-1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+
+	/* Invalid code; 'Pile of poo' without the first byte. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0x9f, &s)), (size_t)-1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+	ATF_TP_ADD_TC(tp, c8rtomb_c_locale_test);
+	ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_1_test);
+	ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_15_test);
+	ATF_TP_ADD_TC(tp, c8rtomb_utf_8_test);
+
+	return (atf_no_error());
+}
Index: src/tests/lib/libc/locale/t_mbrtoc8.c
diff -u /dev/null src/tests/lib/libc/locale/t_mbrtoc8.c:1.1
--- /dev/null	Thu Aug 15 21:19:46 2024
+++ src/tests/lib/libc/locale/t_mbrtoc8.c	Thu Aug 15 21:19:45 2024
@@ -0,0 +1,268 @@
+/*	$NetBSD: t_mbrtoc8.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <e...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for mbrtoc8() as specified by C23.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_mbrtoc8.c,v 1.1 2024/08/15 21:19:45 riastradh Exp $");
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <locale.h>
+#include <string.h>
+#include <uchar.h>
+
+#include <atf-c.h>
+
+static void
+require_lc_ctype(const char *locale_name)
+{
+	char *lc_ctype_set;
+
+	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
+	if (lc_ctype_set == NULL)
+		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
+		    locale_name, errno);
+
+	ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
+	    "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
+}
+
+static mbstate_t s;
+static char8_t c8;
+
+ATF_TC_WITHOUT_HEAD(mbrtoc8_c_locale_test);
+ATF_TC_BODY(mbrtoc8_c_locale_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("C");
+
+	/* Null wide character, internal state. */
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 1, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
+
+	/* Null wide character. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 1, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
+
+	/* Latin letter A, internal state. */
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(NULL, 0, 0, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "A", 1, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
+	    (uint8_t)c8, (uint8_t)'A');
+
+	/* Latin letter A. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "A", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
+	    (uint8_t)c8, (uint8_t)'A');
+
+	/* Incomplete character sequence. */
+	c8 = 'z';
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 'z', "c8=0x%"PRIx8" 'z'=0x%"PRIx8,
+	    (uint8_t)c8, (uint8_t)'z');
+
+	/* Check that mbrtoc8() doesn't access the buffer when n == 0. */
+	c8 = 'z';
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 'z', "c8=0x%"PRIx8" 'z'=0x%"PRIx8,
+	    (uint8_t)c8, (uint8_t)'z');
+
+	/* Check that mbrtoc8() doesn't read ahead too aggressively. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "AB", 2, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
+	    (uint8_t)c8, (uint8_t)'A');
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "C", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 'C', "c8=0x%"PRIx8" 'C'=0x%"PRIx8,
+	    (uint8_t)c8, (uint8_t)'C');
+
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc8_iso_8859_1_test);
+ATF_TC_BODY(mbrtoc8_iso_8859_1_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.ISO8859-1");
+
+	/* Currency sign. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xa4", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xc2, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xa4, "c8=0x%"PRIx8, (uint8_t)c8);
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc8_iso_8859_15_test);
+ATF_TC_BODY(mbrtoc8_iso_8859_15_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.ISO8859-15");
+
+	/* Euro sign. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xa4", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xe2, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0x82, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xac, "c8=0x%"PRIx8, (uint8_t)c8);
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc8_utf_8_test);
+ATF_TC_BODY(mbrtoc8_utf_8_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.UTF-8");
+
+	/* Null wide character, internal state. */
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(NULL, 0, 0, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 1, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
+
+	/* Null wide character. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 1, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
+
+	/* Latin letter A, internal state. */
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(NULL, 0, 0, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "A", 1, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
+	    (uint8_t)c8, (uint8_t)'A');
+
+	/* Latin letter A. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "A", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 'A', "c8=0x%"PRIx8" 'A'=0x%"PRIx8,
+	    (uint8_t)c8, (uint8_t)'A');
+
+	/* Incomplete character sequence (zero length). */
+	c8 = 'z';
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 'z', "c8=0x%"PRIx8" 'z'=0x%"PRIx8,
+	    (uint8_t)c8, (uint8_t)'z');
+
+	/* Incomplete character sequence (truncated double-byte). */
+	memset(&s, 0, sizeof(s));
+	c8 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xc3", 1, &s)), (size_t)-2,
+	    "n=%zu", n);
+
+	/* Same as above, but complete. */
+	memset(&s, 0, sizeof(s));
+	c8 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xc3\x84", 2, &s)), 2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xc3, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0x84, "c8=0x%"PRIx8, (uint8_t)c8);
+
+	/* Test restarting behaviour. */
+	memset(&s, 0, sizeof(s));
+	c8 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xc3", 1, &s)), (size_t)-2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xb7", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xc3, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xb7, "c8=0x%"PRIx8, (uint8_t)c8);
+
+	/* Four-byte sequence. */
+	memset(&s, 0, sizeof(s));
+	c8 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xf0\x9f\x92\xa9", 4, &s)), 4,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xf0, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0x9f, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0x92, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xa9, "c8=0x%"PRIx8, (uint8_t)c8);
+
+	/* Letter e with acute, precomposed. */
+	memset(&s, 0, sizeof(s));
+	c8 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xc3\xa9", 2, &s)), 2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xc3, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xa9, "c8=0x%"PRIx8, (uint8_t)c8);
+
+	/* Letter e with acute, combined. */
+	memset(&s, 0, sizeof(s));
+	c8 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\x65\xcc\x81", 3, &s)), 1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0x65, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "\xcc\x81", 2, &s)), 2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0xcc, "c8=0x%"PRIx8, (uint8_t)c8);
+	ATF_CHECK_EQ_MSG((n = mbrtoc8(&c8, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c8, 0x81, "c8=0x%"PRIx8, (uint8_t)c8);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+	ATF_TP_ADD_TC(tp, mbrtoc8_c_locale_test);
+	ATF_TP_ADD_TC(tp, mbrtoc8_iso_8859_1_test);
+	ATF_TP_ADD_TC(tp, mbrtoc8_iso_8859_15_test);
+	ATF_TP_ADD_TC(tp, mbrtoc8_utf_8_test);
+
+	return (atf_no_error());
+}

CVS commit: src

Reply via email to