Module Name:    src
Committed By:   riastradh
Date:           Thu Aug 15 14:16:34 UTC 2024

Modified Files:
        src/distrib/sets/lists/base: shl.mi
        src/distrib/sets/lists/comp: mi
        src/distrib/sets/lists/debug: mi
        src/distrib/sets/lists/tests: mi
        src/lib/libc: shlib_version
        src/lib/libc/locale: Makefile.inc
        src/share/man/man3: Makefile
        src/tests/lib/libc/locale: Makefile
Added Files:
        src/lib/libc/locale: c16rtomb.3 c16rtomb.c c32rtomb.3 c32rtomb.c
            c32rtomb.h mbrtoc16.3 mbrtoc16.c mbrtoc32.3 mbrtoc32.c mbrtoc32.h
        src/share/man/man3: uchar.3
        src/tests/lib/libc/locale: t_c16rtomb.c t_c32rtomb.c t_mbrtoc16.c
            t_mbrtoc32.c

Log Message:
libc: New C11 functions mbrtoc16, mbrtoc32, c16rtomb, c32rtomb.

The mbrtoc16/32 functions read mulitbyte strings according to the
current locale into UTF-16/32 code unit sequences; the c16/32rtomb
functions write UTF-16/32 code unit sequences into multibyte strings
according to the current locale.  The `r' means restartable: they
work incrementally and pick up where they left off.

NOTE: This bumps the libc minor version, since it adds new symbols.

PR lib/52374: <uchar.h> missing


To generate a diff of this commit:
cvs rdiff -u -r1.987 -r1.988 src/distrib/sets/lists/base/shl.mi
cvs rdiff -u -r1.2468 -r1.2469 src/distrib/sets/lists/comp/mi
cvs rdiff -u -r1.442 -r1.443 src/distrib/sets/lists/debug/mi
cvs rdiff -u -r1.1330 -r1.1331 src/distrib/sets/lists/tests/mi
cvs rdiff -u -r1.296 -r1.297 src/lib/libc/shlib_version
cvs rdiff -u -r1.65 -r1.66 src/lib/libc/locale/Makefile.inc
cvs rdiff -u -r0 -r1.1 src/lib/libc/locale/c16rtomb.3 \
    src/lib/libc/locale/c16rtomb.c src/lib/libc/locale/c32rtomb.3 \
    src/lib/libc/locale/c32rtomb.c src/lib/libc/locale/c32rtomb.h \
    src/lib/libc/locale/mbrtoc16.3 src/lib/libc/locale/mbrtoc16.c \
    src/lib/libc/locale/mbrtoc32.3 src/lib/libc/locale/mbrtoc32.c \
    src/lib/libc/locale/mbrtoc32.h
cvs rdiff -u -r1.92 -r1.93 src/share/man/man3/Makefile
cvs rdiff -u -r0 -r1.1 src/share/man/man3/uchar.3
cvs rdiff -u -r1.16 -r1.17 src/tests/lib/libc/locale/Makefile
cvs rdiff -u -r0 -r1.1 src/tests/lib/libc/locale/t_c16rtomb.c \
    src/tests/lib/libc/locale/t_c32rtomb.c \
    src/tests/lib/libc/locale/t_mbrtoc16.c \
    src/tests/lib/libc/locale/t_mbrtoc32.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/distrib/sets/lists/base/shl.mi
diff -u src/distrib/sets/lists/base/shl.mi:1.987 src/distrib/sets/lists/base/shl.mi:1.988
--- src/distrib/sets/lists/base/shl.mi:1.987	Fri Aug  2 17:25:38 2024
+++ src/distrib/sets/lists/base/shl.mi	Thu Aug 15 14:16:32 2024
@@ -1,4 +1,4 @@
-# $NetBSD: shl.mi,v 1.987 2024/08/02 17:25:38 christos Exp $
+# $NetBSD: shl.mi,v 1.988 2024/08/15 14:16:32 riastradh Exp $
 #
 # Note:	Don't delete entries from here - mark them as "obsolete" instead,
 #	unless otherwise stated below.
@@ -22,7 +22,7 @@
 ./lib/libblocklist.so.0.1			base-sys-shlib		dynamicroot
 ./lib/libc.so					base-sys-shlib		dynamicroot
 ./lib/libc.so.12				base-sys-shlib		dynamicroot
-./lib/libc.so.12.221				base-sys-shlib		dynamicroot
+./lib/libc.so.12.222				base-sys-shlib		dynamicroot
 ./lib/libcrypt.so				base-sys-shlib		dynamicroot
 ./lib/libcrypt.so.1				base-sys-shlib		dynamicroot
 ./lib/libcrypt.so.1.0				base-sys-shlib		dynamicroot
@@ -257,7 +257,7 @@
 ./usr/lib/libc++.so.1.0				base-sys-shlib		compatfile,libcxx
 ./usr/lib/libc.so				base-sys-shlib		compatfile
 ./usr/lib/libc.so.12				base-sys-shlib		compatfile
-./usr/lib/libc.so.12.221			base-sys-shlib		compatfile
+./usr/lib/libc.so.12.222			base-sys-shlib		compatfile
 ./usr/lib/libcbor.so				base-sys-shlib		compatfile
 ./usr/lib/libcbor.so.0				base-sys-shlib		compatfile
 ./usr/lib/libcbor.so.0.5			base-sys-shlib		compatfile

Index: src/distrib/sets/lists/comp/mi
diff -u src/distrib/sets/lists/comp/mi:1.2468 src/distrib/sets/lists/comp/mi:1.2469
--- src/distrib/sets/lists/comp/mi:1.2468	Thu Aug 15 13:14:43 2024
+++ src/distrib/sets/lists/comp/mi	Thu Aug 15 14:16:32 2024
@@ -1,4 +1,4 @@
-#	$NetBSD: mi,v 1.2468 2024/08/15 13:14:43 riastradh Exp $
+#	$NetBSD: mi,v 1.2469 2024/08/15 14:16:32 riastradh Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 ./etc/mtree/set.comp				comp-sys-root
@@ -6842,6 +6842,8 @@
 ./usr/share/man/cat3/bufferevent_write_buffer.0 comp-obsolete		obsolete
 ./usr/share/man/cat3/byteorder.0		comp-c-catman		.cat
 ./usr/share/man/cat3/bzero.0			comp-c-catman		.cat
+./usr/share/man/cat3/c16rtomb.0			comp-c-catman		.cat
+./usr/share/man/cat3/c32rtomb.0			comp-c-catman		.cat
 ./usr/share/man/cat3/cabs.0			comp-c-catman		complex,.cat
 ./usr/share/man/cat3/cabsf.0			comp-c-catman		complex,.cat
 ./usr/share/man/cat3/cabsl.0			comp-c-catman		complex,.cat
@@ -9204,6 +9206,8 @@
 ./usr/share/man/cat3/math.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mblen.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mbrlen.0			comp-c-catman		.cat
+./usr/share/man/cat3/mbrtoc16.0			comp-c-catman		.cat
+./usr/share/man/cat3/mbrtoc32.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mbrtowc.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mbsinit.0			comp-c-catman		.cat
 ./usr/share/man/cat3/mbsrtowcs.0		comp-c-catman		.cat
@@ -11148,6 +11152,7 @@
 ./usr/share/man/cat3/tzset.0			comp-c-catman		.cat
 ./usr/share/man/cat3/tzsetwall.0		comp-c-catman		.cat
 ./usr/share/man/cat3/ualarm.0			comp-c-catman		.cat
+./usr/share/man/cat3/uchar.0			comp-c-catman		.cat
 ./usr/share/man/cat3/uid_from_user.0		comp-c-catman		.cat
 ./usr/share/man/cat3/ukfs.0			comp-c-catman		.cat,rump
 ./usr/share/man/cat3/ulimit.0			comp-c-catman		.cat
@@ -15402,6 +15407,8 @@
 ./usr/share/man/html3/bufferevent_write_buffer.html	comp-obsolete		obsolete
 ./usr/share/man/html3/byteorder.html		comp-c-htmlman		html
 ./usr/share/man/html3/bzero.html		comp-c-htmlman		html
+./usr/share/man/html3/c16rtomb.html		comp-c-htmlman		html
+./usr/share/man/html3/c32rtomb.html		comp-c-htmlman		html
 ./usr/share/man/html3/cabs.html			comp-c-htmlman		complex,html
 ./usr/share/man/html3/cabsf.html		comp-c-htmlman		complex,html
 ./usr/share/man/html3/cabsl.html		comp-c-htmlman		complex,html
@@ -17696,6 +17703,8 @@
 ./usr/share/man/html3/math.html			comp-c-htmlman		html
 ./usr/share/man/html3/mblen.html		comp-c-htmlman		html
 ./usr/share/man/html3/mbrlen.html		comp-c-htmlman		html
+./usr/share/man/html3/mbrtoc16.html		comp-c-htmlman		html
+./usr/share/man/html3/mbrtoc32.html		comp-c-htmlman		html
 ./usr/share/man/html3/mbrtowc.html		comp-c-htmlman		html
 ./usr/share/man/html3/mbsinit.html		comp-c-htmlman		html
 ./usr/share/man/html3/mbsrtowcs.html		comp-c-htmlman		html
@@ -19621,6 +19630,7 @@
 ./usr/share/man/html3/tzset.html		comp-c-htmlman		html
 ./usr/share/man/html3/tzsetwall.html		comp-c-htmlman		html
 ./usr/share/man/html3/ualarm.html		comp-c-htmlman		html
+./usr/share/man/html3/uchar.html		comp-c-htmlman		html
 ./usr/share/man/html3/uid_from_user.html	comp-c-htmlman		html
 ./usr/share/man/html3/ukfs.html			comp-c-htmlman		html,rump
 ./usr/share/man/html3/ulimit.html		comp-c-htmlman		html
@@ -23821,6 +23831,8 @@
 ./usr/share/man/man3/bufferevent_write_buffer.3 comp-obsolete		obsolete
 ./usr/share/man/man3/byteorder.3		comp-c-man		.man
 ./usr/share/man/man3/bzero.3			comp-c-man		.man
+./usr/share/man/man3/c16rtomb.3			comp-c-man		.man
+./usr/share/man/man3/c32rtomb.3			comp-c-man		.man
 ./usr/share/man/man3/cabs.3			comp-c-man		complex,.man
 ./usr/share/man/man3/cabsf.3			comp-c-man		complex,.man
 ./usr/share/man/man3/cabsl.3			comp-c-man		complex,.man
@@ -26195,6 +26207,8 @@
 ./usr/share/man/man3/math.3			comp-c-man		.man
 ./usr/share/man/man3/mblen.3			comp-c-man		.man
 ./usr/share/man/man3/mbrlen.3			comp-c-man		.man
+./usr/share/man/man3/mbrtoc16.3			comp-c-man		.man
+./usr/share/man/man3/mbrtoc32.3			comp-c-man		.man
 ./usr/share/man/man3/mbrtowc.3			comp-c-man		.man
 ./usr/share/man/man3/mbsinit.3			comp-c-man		.man
 ./usr/share/man/man3/mbsrtowcs.3		comp-c-man		.man
@@ -28158,6 +28172,7 @@
 ./usr/share/man/man3/tzset.3			comp-c-man		.man
 ./usr/share/man/man3/tzsetwall.3		comp-c-man		.man
 ./usr/share/man/man3/ualarm.3			comp-c-man		.man
+./usr/share/man/man3/uchar.3			comp-c-man		.man
 ./usr/share/man/man3/uid_from_user.3		comp-c-man		.man
 ./usr/share/man/man3/ukfs.3			comp-c-man		.man,rump
 ./usr/share/man/man3/ulimit.3			comp-c-man		.man

Index: src/distrib/sets/lists/debug/mi
diff -u src/distrib/sets/lists/debug/mi:1.442 src/distrib/sets/lists/debug/mi:1.443
--- src/distrib/sets/lists/debug/mi:1.442	Thu Aug 15 13:14:44 2024
+++ src/distrib/sets/lists/debug/mi	Thu Aug 15 14:16:32 2024
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.442 2024/08/15 13:14:44 riastradh Exp $
+# $NetBSD: mi,v 1.443 2024/08/15 14:16:32 riastradh Exp $
 #
 ./etc/mtree/set.debug                           comp-sys-root
 ./usr/lib					comp-sys-usr		compatdir
@@ -2058,11 +2058,15 @@
 ./usr/libdata/debug/usr/tests/lib/libc/inet/t_inet_network.debug	tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/kevent_nullmnt/h_nullmnt.debug	tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_btowc.debug		tests-lib-debug		debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_c16rtomb.debug		tests-lib-debug		debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_c32rtomb.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype1.debug		tests-obsolete		obsolete,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype2.debug		tests-obsolete		obsolete,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_digittoint.debug	tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_ducet.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_io.debug		tests-lib-debug		debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc16.debug		tests-lib-debug		debug,atf,compattestfile
+./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc32.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtowc.debug		tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbsnrtowcs.debug	tests-lib-debug		debug,atf,compattestfile
 ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbstowcs.debug		tests-lib-debug		debug,atf,compattestfile

Index: src/distrib/sets/lists/tests/mi
diff -u src/distrib/sets/lists/tests/mi:1.1330 src/distrib/sets/lists/tests/mi:1.1331
--- src/distrib/sets/lists/tests/mi:1.1330	Thu Aug 15 13:14:44 2024
+++ src/distrib/sets/lists/tests/mi	Thu Aug 15 14:16:33 2024
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.1330 2024/08/15 13:14:44 riastradh Exp $
+# $NetBSD: mi,v 1.1331 2024/08/15 14:16:33 riastradh Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 #
@@ -3073,11 +3073,15 @@
 ./usr/tests/lib/libc/locale/Atffile			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/Kyuafile			tests-lib-tests		compattestfile,atf,kyua
 ./usr/tests/lib/libc/locale/t_btowc			tests-lib-tests		compattestfile,atf
+./usr/tests/lib/libc/locale/t_c16rtomb			tests-lib-tests		compattestfile,atf
+./usr/tests/lib/libc/locale/t_c32rtomb			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_ctype1			tests-obsolete		obsolete
 ./usr/tests/lib/libc/locale/t_ctype2			tests-obsolete		obsolete
 ./usr/tests/lib/libc/locale/t_digittoint		tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_ducet			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_io			tests-lib-tests		compattestfile,atf
+./usr/tests/lib/libc/locale/t_mbrtoc16			tests-lib-tests		compattestfile,atf
+./usr/tests/lib/libc/locale/t_mbrtoc32			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_mbrtowc			tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_mbsnrtowcs		tests-lib-tests		compattestfile,atf
 ./usr/tests/lib/libc/locale/t_mbstowcs			tests-lib-tests		compattestfile,atf

Index: src/lib/libc/shlib_version
diff -u src/lib/libc/shlib_version:1.296 src/lib/libc/shlib_version:1.297
--- src/lib/libc/shlib_version:1.296	Fri Feb  2 21:52:22 2024
+++ src/lib/libc/shlib_version	Thu Aug 15 14:16:33 2024
@@ -1,4 +1,4 @@
-#	$NetBSD: shlib_version,v 1.296 2024/02/02 21:52:22 andvar Exp $
+#	$NetBSD: shlib_version,v 1.297 2024/08/15 14:16:33 riastradh Exp $
 #	Remember to update distrib/sets/lists/base/shl.* when changing
 #
 # things we wish to do on next major version bump:
@@ -55,4 +55,4 @@
 # - remove tzsetwall(3), upstream has removed it
 # - move *rand48* to libcompat
 major=12
-minor=221
+minor=222

Index: src/lib/libc/locale/Makefile.inc
diff -u src/lib/libc/locale/Makefile.inc:1.65 src/lib/libc/locale/Makefile.inc:1.66
--- src/lib/libc/locale/Makefile.inc:1.65	Mon Feb 15 14:35:04 2021
+++ src/lib/libc/locale/Makefile.inc	Thu Aug 15 14:16:33 2024
@@ -1,5 +1,5 @@
 #	from: @(#)Makefile.inc	5.1 (Berkeley) 2/18/91
-#	$NetBSD: Makefile.inc,v 1.65 2021/02/15 14:35:04 christos Exp $
+#	$NetBSD: Makefile.inc,v 1.66 2024/08/15 14:16:33 riastradh Exp $
 
 # locale sources
 .PATH: ${ARCHDIR}/locale ${.CURDIR}/locale
@@ -11,6 +11,13 @@ SRCS+=	setlocale.c __mb_cur_max.c \
 	wcstol.c wcstoll.c wcstoimax.c wcstoul.c wcstoull.c wcstoumax.c \
 	wcstod.c wcstof.c wcstold.c wcscoll.c wcsxfrm.c wcsftime.c
 
+SRCS+=	c16rtomb.c
+SRCS+=	c32rtomb.c
+SRCS+=	mbrtoc16.c
+SRCS+=	mbrtoc32.c
+CPPFLAGS.c32rtomb.c+=		-I${LIBCDIR}/citrus
+CPPFLAGS.mbrtoc32.c+=		-I${LIBCDIR}/citrus
+
 # citrus multibyte locale support
 # we have quirk for libc.a - see the last part of lib/libc/Makefile
 CPPFLAGS+=	-DWITH_RUNE -I${.CURDIR}
@@ -29,6 +36,11 @@ MAN+=	btowc.3 mbrtowc.3 mbsrtowcs.3 \
 	wctob.3 wcrtomb.3 wcsrtombs.3 \
 	mbrlen.3 mbsinit.3
 
+MAN+=	c16rtomb.3
+MAN+=	c32rtomb.3
+MAN+=	mbrtoc16.3
+MAN+=	mbrtoc32.3
+
 MAN+=	iswalnum.3 wctype.3 iswctype.3 \
 	towlower.3 wctrans.3 towctrans.3 \
 	wcwidth.3

Index: src/share/man/man3/Makefile
diff -u src/share/man/man3/Makefile:1.92 src/share/man/man3/Makefile:1.93
--- src/share/man/man3/Makefile:1.92	Thu Mar  7 22:14:20 2024
+++ src/share/man/man3/Makefile	Thu Aug 15 14:16:34 2024
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile,v 1.92 2024/03/07 22:14:20 christos Exp $
+#	$NetBSD: Makefile,v 1.93 2024/08/15 14:16:34 riastradh Exp $
 #	@(#)Makefile	8.2 (Berkeley) 12/13/93
 
 MAN=	_DIAGASSERT.3 __CONCAT.3 __FPTRCAST.3 __UNCONST.3 __USE.3 CMSG_DATA.3 \
@@ -13,6 +13,7 @@ MAN=	_DIAGASSERT.3 __CONCAT.3 __FPTRCAST
 	makedev.3 offsetof.3 param.3 paths.3 queue.3 rbtree.3 sigevent.3 \
 	stdarg.3 stdbool.3 stddef.3 stdint.3 stdlib.3 sysexits.3 \
 	tgmath.3 timeradd.3 timeval.3 tm.3 tree.3 typeof.3 types.3 \
+	uchar.3 \
 	unistd.3
 
 USETBL=	# used by queue.3

Index: src/tests/lib/libc/locale/Makefile
diff -u src/tests/lib/libc/locale/Makefile:1.16 src/tests/lib/libc/locale/Makefile:1.17
--- src/tests/lib/libc/locale/Makefile:1.16	Thu Aug 15 13:14:44 2024
+++ src/tests/lib/libc/locale/Makefile	Thu Aug 15 14:16:34 2024
@@ -1,13 +1,17 @@
-# $NetBSD: Makefile,v 1.16 2024/08/15 13:14:44 riastradh Exp $
+# $NetBSD: Makefile,v 1.17 2024/08/15 14:16:34 riastradh Exp $
 
 .include <bsd.own.mk>
 
 TESTSDIR=	${TESTSBASE}/lib/libc/locale
 
 TESTS_C+=	t_btowc
+TESTS_C+=	t_c16rtomb
+TESTS_C+=	t_c32rtomb
 TESTS_C+=	t_digittoint
 TESTS_C+=	t_ducet
 TESTS_C+=	t_io
+TESTS_C+=	t_mbrtoc16
+TESTS_C+=	t_mbrtoc32
 TESTS_C+=	t_mbrtowc
 TESTS_C+=	t_mbsnrtowcs
 TESTS_C+=	t_mbstowcs

Added files:

Index: src/lib/libc/locale/c16rtomb.3
diff -u /dev/null src/lib/libc/locale/c16rtomb.3:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c16rtomb.3	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,199 @@
+.\"	$NetBSD: c16rtomb.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt C16RTOMB 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm c16rtomb
+.Nd Restartable UTF-16 code unit to multibyte conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn c16rtomb "char * restrict s" \
+"char16_t c16" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to encode Unicode input as a multibyte character
+sequence output at
+.Fa s
+in the current locale, writing anywhere between zero and
+.Dv MB_CUR_MAX
+bytes, inclusive, to
+.Fa s ,
+depending on the inputs and conversion state
+.Fa ps .
+.Pp
+The input
+.Fa c16
+is a UTF-16 code unit, which can be either:
+.Bl -bullet
+.It
+a Unicode scalar value in the Basic Multilingual Plane (BMP), that is,
+a 16-bit code unit outside the interval [0xd800,0xdfff]; or,
+.It
+over the course of two consecutive calls to
+.Nm ,
+the high and low surrogate code points of a Unicode scalar value
+outside the BMP.
+.El
+.Pp
+If a low surrogate code point, that is, a value of
+.Fa c16
+in [0xdc00,0xdfff], is passed to
+.Nm
+without the preceding call to it with the same
+.Fa ps
+having been passed a high surrogate code point, that is, a value of
+.Fa c16
+in [0xd800,0xdbff], or if a high surrogate was passed in the previous
+call and anything other than a low surrogate is passed, then
+.Nm
+will return
+.Li (size_t)-1
+to denote failure with
+.Xr errno 2
+set to
+.Er EILSEQ .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns the number of bytes written to
+.Fa s
+on success, or sets
+.Xr errno 2
+and returns
+.Li "(size_t)-1"
+on failure.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+Convert a UTF-16 code unit sequence to a multibyte string,
+NUL-terminate it, and print it:
+.Bd -literal -offset indent
+char16_t c16[] = { 0xd83d, 0xdca9 };
+char buf[__arraycount(c16)*MB_CUR_MAX + 1], *s = buf;
+size_t i;
+mbstate_t mbs = {0};	/* initial conversion state */
+
+for (i = 0; i < __arraycount(c16); i++) {
+	size_t len;
+
+	len = c16rtomb(s, c16[i], &mbs);
+	if (len == (size_t)-1)
+		err(1, "c16rtomb");
+	assert(len <= sizeof(buf) - (s - buf));
+	s += len;
+}
+*s = '\e0';		/* NUL-terminate */
+printf("%s\n", buf);
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+A surrogate code point was passed as
+.Fa c16
+when it is inappropriate.
+.It Bq Er EILSEQ
+The Unicode scalar value requested cannot be encoded as a multibyte
+sequence in the current locale.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c32rtomb 3 ,
+.Xr mbrtoc16 3 ,
+.Xr mbrtoc32 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.Rs
+.%A P. Hoffman
+.%A F. Yergeau
+.%T UTF-16, an encoding of ISO 10646
+.%R RFC 2781
+.%D February 2000
+.%I Internet Engineering Task Force
+.%U https://datatracker.ietf.org/doc/html/rfc2781
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.Nm
+function conforms to
+.St -isoC-2011 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh BUGS
+It is not clear from the standard how
+.Nm
+is supposed to behave when given a high surrogate code point followed
+by a NUL:
+.Bd -literal -offset indent
+c16rtomb(s, 0xd800, ps);
+c16rtomb(s, L'\e0', ps);
+.Ed
+.Pp
+Currently this fails with
+.Er EILSEQ
+which matches other implementations, but this is at odds with language
+in the standard which suggests that passing
+.Li L'\e0'
+should unconditionally store a null byte and reset
+.Fa ps
+to the initial conversion state:
+.Bd -offset indent
+If
+.Fa c16
+is a null wide character, a null byte is stored, preceded by any shift
+sequence needed to restore the initial shift state; the resulting state
+described is the initial conversion state.
+.Ed
+.Pp
+However, it is unclear what else this should store besides a null
+byte.
+Should it discard the pending high surrogate, or convert it to
+something else and store that?
Index: src/lib/libc/locale/c16rtomb.c
diff -u /dev/null src/lib/libc/locale/c16rtomb.c:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c16rtomb.c	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,181 @@
+/*	$NetBSD: c16rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * c16rtomb(s, c16, ps)
+ *
+ *	Encode the Unicode UTF-16 code unit c16, which may be surrogate
+ *	code point, into the multibyte buffer s under the current
+ *	locale, using multibyte encoding state ps.
+ *
+ *	If c16 is a high surrogate, no output will be produced, but c16
+ *	will be remembered; this must be followed by another call
+ *	passing the trailing low surrogate.
+ *
+ *	If c16 is a low surrogate, it must have been preceded by a call
+ *	with the leading high surrogate; at this point the combined
+ *	scalar value will be produced as output.
+ *
+ *	Return the number of bytes stored on success, or (size_t)-1 on
+ *	error with errno set to EILSEQ.
+ *
+ *	At most MB_CUR_MAX bytes will be stored.
+ *
+ * References:
+ *
+ *	The Unicode Standard, Version 15.0 -- Core Specification, The
+ *	Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ *	https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ *	https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ *
+ *	The Unicode Standard, Version 15.0 -- Core Specification, The
+ *	Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-16,
+ *	p. 124.
+ *	https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *	https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *
+ *	P. Hoffman and F. Yergeau, `UTF-16, an encoding of ISO 10646',
+ *	RFC 2781, Internet Engineering Task Force, February 2000,
+ *	Sec. 2.2: `Decoding UTF-16'.
+ *	https://datatracker.ietf.org/doc/html/rfc2781#section-2.2
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: c16rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $");
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <uchar.h>
+
+#include "c32rtomb.h"
+
+struct c16rtombstate {
+	char16_t	surrogate;
+	mbstate_t	mbs;
+};
+__CTASSERT(offsetof(struct c16rtombstate, mbs) <= sizeof(mbstate_t));
+__CTASSERT(sizeof(struct c32rtombstate) <= sizeof(mbstate_t) -
+    offsetof(struct c16rtombstate, mbs));
+__CTASSERT(_Alignof(struct c16rtombstate) <= _Alignof(mbstate_t));
+
+size_t
+c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps)
+{
+	static mbstate_t psbuf;
+	char buf[MB_LEN_MAX];
+	struct c16rtombstate *S;
+	char32_t c32;
+
+	/*
+	 * `If ps is a null pointer, each function uses its own
+	 *  internal mbstate_t object instead, which is initialized at
+	 *  program startup to the initial conversion state; the
+	 *  functions are not required to avoid data races with other
+	 *  calls to the same function in this case.  The
+	 *  implementation behaves as if no library function calls
+	 *  these functions with a null pointer for ps.'
+	 */
+	if (ps == NULL)
+		ps = &psbuf;
+
+	/*
+	 * `If s is a null pointer, the c16rtomb function is equivalent
+	 *  to the call
+	 *
+	 *	c16rtomb(buf, L'\0', ps)
+	 *
+	 *  where buf is an internal buffer.
+	 */
+	if (s == NULL) {
+		s = buf;
+		c16 = L'\0';
+	}
+
+	/*
+	 * Open the private UTF-16 decoding state.
+	 */
+	S = (struct c16rtombstate *)ps;
+
+#if 0
+	/*
+	 * `If c16 is a null wide character, a null byte is stored,
+	 *  preceded by any shift sequence needed to restore the
+	 *  initial shift state; the resulting state described is the
+	 *  initial conversion state.'
+	 *
+	 * XXX But what else gets stored?  Do we just discard any
+	 * pending high surrogate, or do we convert it to something
+	 * else, or what?
+	 */
+	if (c16 == L'\0') {
+		S->surrogate = 0;
+	}
+#endif
+
+	/*
+	 * Check whether:
+	 *
+	 * 1. We had previously decoded a high surrogate.
+	 *    => Decode the low surrogate -- reject if it's not a low
+	 *       surrogate -- and combine them to output a scalar
+	 *       value; clear the high surrogate for next time.
+	 * 2. This is a high surrogate.
+	 *    => Save it and wait for the low surrogate with no output.
+	 * 3. This is a low surrogate.
+	 *    => Reject.
+	 * 4. This is not a surrogate.
+	 *    => Output a scalar value.
+	 */
+	if (S->surrogate != 0) {	/* 1. pending surrogate pair */
+		if (c16 < 0xdc00 || c16 > 0xdfff) {
+			errno = EILSEQ;
+			return (size_t)-1;
+		}
+		const char16_t w1 = S->surrogate;
+		const char16_t w2 = c16;
+		c32 = __SHIFTIN(__SHIFTOUT(w1, __BITS(9,0)), __BITS(19,10)) |
+		    __SHIFTIN(__SHIFTOUT(w2, __BITS(9,0)), __BITS(9,0));
+		c32 += 0x10000;
+		S->surrogate = 0;
+	} else if (c16 >= 0xd800 && c16 <= 0xdbff) { /* 2. high surrogate */
+		S->surrogate = c16;
+		return 0;	/* produced nothing */
+	} else if (c16 >= 0xdc00 && c16 <= 0xdfff) { /* 3. low surrogate */
+		errno = EILSEQ;
+		return (size_t)-1;
+	} else {		/* 4. not a surrogate */
+		c32 = c16;
+	}
+
+	/*
+	 * We have a scalar value.  Output it.
+	 */
+	return c32rtomb(s, c32, &S->mbs);
+}
Index: src/lib/libc/locale/c32rtomb.3
diff -u /dev/null src/lib/libc/locale/c32rtomb.3:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c32rtomb.3	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,141 @@
+.\"	$NetBSD: c32rtomb.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt C32RTOMB 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm c32rtomb
+.Nd Restartable UTF-32 code unit to multibyte conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn c32rtomb "char * restrict s" \
+"char32_t c32" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to encode Unicode input as a multibyte character
+sequence output at
+.Fa s
+in the current locale, writing anywhere between zero and
+.Dv MB_CUR_MAX
+bytes, inclusive, to
+.Fa s ,
+depending on the inputs and conversion state
+.Fa ps .
+.Pp
+The input
+.Fa c32
+is a UTF-32 code unit, which represents a single Unicode scalar value,
+i.e., a Unicode code point that is not in the interval [0xd800,0xdfff]
+of surrogate code points.
+.Pp
+If a surrogate code point is passed,
+.Nm
+ will return
+.Li (size_t)-1
+to denote failure with
+.Xr errno 2
+set to
+.Er EILSEQ .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns the number of bytes written to
+.Fa s
+on success, or sets
+.Xr errno 2
+and returns
+.Li "(size_t)-1"
+on failure.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+Convert a sequence of Unicode scalar values to a multibyte sequence,
+NUL-terminate it, and print it:
+.Bd -literal -offset indent
+char32_t c32[] = { 0x1f4a9, 0x20ac, 0x21 };
+char buf[__arraycountb(c32)*MB_CUR_MAX + 1], *s = buf;
+size_t i;
+mbstate_t mbs = {0};	/* initial conversion state */
+
+for (i = 0; i < __arraycount(c32); i++) {
+	size_t len;
+
+	len = c32rtomb(s, c32[i], &mbs);
+	if (len == (size_t)-1)
+		err(1, "c32rtomb");
+	assert(len <= sizeof(buf) - (s - buf));
+	s += len;
+}
+*s = '\e0';		/* NUL-terminate */
+printf("%s\n", buf);
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+A surrogate code point was passed as
+.Fa c32 .
+.It Bq Er EILSEQ
+The Unicode scalar value requested cannot be encoded as a multibyte
+sequence in the current locale.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c16rtomb 3 ,
+.Xr mbrtoc16 3 ,
+.Xr mbrtoc32 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.Nm
+function conforms to
+.St -isoC-2011 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
Index: src/lib/libc/locale/c32rtomb.c
diff -u /dev/null src/lib/libc/locale/c32rtomb.c:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c32rtomb.c	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,162 @@
+/*	$NetBSD: c32rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * c32rtomb(s, c32, ps)
+ *
+ *	Encode the Unicode UTF-32 code unit c32, which must not be a
+ *	surrogate code point, into the multibyte buffer s under the
+ *	current locale, using multibyte encoding state ps.  A UTF-32
+ *	code unit is also a Unicode scalar value, which is any Unicode
+ *	code point except a surrogate.
+ *
+ *	Return the number of bytes stored on success, or (size_t)-1 on
+ *	error with errno set to EILSEQ.
+ *
+ *	At most MB_CUR_MAX bytes will be stored.
+ *
+ * References:
+ *
+ *	The Unicode Standard, Version 15.0 -- Core Specification, The
+ *	Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ *	https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ *	https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: c32rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $");
+
+#include <sys/types.h>		/* broken citrus_*.h */
+#include <sys/queue.h>		/* broken citrus_*.h */
+
+#include <assert.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <paths.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <uchar.h>
+#include <wchar.h>
+
+#include "citrus_types.h"	/* broken citrus_iconv.h */
+#include "citrus_module.h"	/* broken citrus_iconv.h */
+#include "citrus_hash.h"	/* broken citrus_iconv.h */
+#include "citrus_iconv.h"
+
+size_t
+c32rtomb(char *restrict s, char32_t c32, mbstate_t *restrict ps)
+{
+	char buf[MB_LEN_MAX];
+	struct _citrus_iconv *iconv = NULL;
+	char srcbuf[4];
+	const char *src;
+	char *dst;
+	size_t srcleft, dstleft, inval, len;
+	int error, errno_save;
+
+	/*
+	 * Save errno in case _citrus_iconv_* clobbers it.
+	 */
+	errno_save = errno;
+
+	/*
+	 * `If s is a null pointer, the c32rtomb function is equivalent
+	 *  to the call
+	 *
+	 *	c32rtomb(buf, L'\0', ps)
+	 *
+	 *  where buf is an internal buffer.'
+	 */
+	if (s == NULL) {
+		s = buf;
+		c32 = L'\0';
+	}
+
+	/*
+	 * Reject surrogates.
+	 */
+	if (c32 >= 0xd800 && c32 <= 0xdfff) {
+		errno = EILSEQ;
+		len = (size_t)-1;
+		goto out;
+	}
+
+	/*
+	 * Open an iconv handle to convert UTF-32LE to locale-dependent
+	 * multibyte output.
+	 */
+	if ((error = _citrus_iconv_open(&iconv, _PATH_ICONV, "utf-32le",
+		    nl_langinfo(CODESET))) != 0) {
+		errno = EIO; /* XXX? */
+		len = (size_t)-1;
+		goto out;
+	}
+
+	/*
+	 * Convert from UTF-32LE in our buffer.
+	 */
+	le32enc(srcbuf, c32);
+	src = srcbuf;
+	srcleft = sizeof(srcbuf);
+	dst = s;
+	dstleft = MB_CUR_MAX;
+	error = _citrus_iconv_convert(iconv,
+	    &src, &srcleft,
+	    &dst, &dstleft,
+	    _CITRUS_ICONV_F_HIDE_INVALID, &inval);
+	if (error) {		/* can't be incomplete, must be error */
+		errno = error;
+		len = (size_t)-1;
+		goto out;
+	}
+	_DIAGASSERT(srcleft == 0);
+	_DIAGASSERT(dstleft <= MB_CUR_MAX);
+
+	/*
+	 * If we didn't produce any output, that means the scalar value
+	 * c32 can't be encoded in the current locale, so treat it as
+	 * EILSEQ.
+	 */
+	len = MB_CUR_MAX - dstleft;
+	if (len == 0) {
+		errno = EILSEQ;
+		len = (size_t)-1;
+		goto out;
+	}
+
+	/*
+	 * Make sure we preserve errno on success.
+	 */
+	errno = errno_save;
+
+out:	errno_save = errno;
+	_citrus_iconv_close(iconv);
+	errno = errno_save;
+	return len;
+}
Index: src/lib/libc/locale/c32rtomb.h
diff -u /dev/null src/lib/libc/locale/c32rtomb.h:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/c32rtomb.h	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,36 @@
+/*	$NetBSD: c32rtomb.h,v 1.1 2024/08/15 14:16:33 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	LIB_LIBC_LOCALE_C32RTOMB_H_
+#define	LIB_LIBC_LOCALE_C32RTOMB_H_
+
+struct c32rtombstate {
+	char		dummy;
+};
+
+#endif	/* LIB_LIBC_LOCALE_C32RTOMB_H_ */
Index: src/lib/libc/locale/mbrtoc16.3
diff -u /dev/null src/lib/libc/locale/mbrtoc16.3:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc16.3	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,304 @@
+.\"	$NetBSD: mbrtoc16.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt MBRTOC16 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm mbrtoc16
+.Nd Restartable multibyte to UTF-16 code unit conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn mbrtoc16 "char16_t * restrict pc16" \
+"const char * restrict s" \
+"size_t n" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to decode a multibyte character sequence at
+.Fa s
+of up to
+.Fa n
+bytes in the current locale, and yield the content as UTF-16 code
+units via the output parameter
+.Fa pc16 .
+.Fa pc16
+may be null, in which case no output is stored.
+.Bl -bullet
+.It
+If the multibyte sequence at
+.Fa s
+is invalid or an error occurs in decoding,
+.Nm
+returns
+.Li (size_t)-1
+and sets
+.Xr errno 2
+to indicate the error.
+.It
+If the multibyte sequence at
+.Fa s
+is still incomplete after
+.Fa n
+bytes, including any previously processed input saved in
+.Fa ps ,
+.Nm
+saves its state in
+.Fa ps
+after all the input so far and returns
+.Li "(size_t)-2".
+.It
+If
+.Nm
+finds the null scalar value at
+.Fa s ,
+then it stores zero at
+.Li * Ns Fa pc16
+and returns zero.
+.It
+If
+.Nm
+finds a nonnull scalar value in the Basic Multilingual Plane, i.e., a
+16-bit scalar value, then it stores the scalar value at
+.Li * Ns Fa pc16 ,
+and returns the number of bytes it read from the input.
+.It
+If
+.Nm
+finds a scalar value outside the Basic Multilingual Plane (BMP), then
+it:
+.Bl -dash -compact
+.It
+stores the scalar value's high surrogate code point at
+.Li * Ns Fa pc16 ;
+.It
+stores conversion state in
+.Fa ps
+to remember the rest of the pending scalar value; and
+.It
+returns the number of bytes it read from the input.
+.El
+.It
+If
+.Nm
+had previously found a scalar value outside the BMP, then, instead of
+any of the above options, it:
+.Bl -dash -compact
+.It
+stores the scalar value's low surrogate code point at
+.Li * Ns Fa pc16 ;
+.It
+consumes rest of the pending scalar value from the conversion state
+.Fa ps ;
+and
+.It
+returns
+.Li (size_t)-3
+to indicate that no bytes were consumed but a code unit was yielded
+nevertheless.
+.El
+.El
+.Pp
+If
+.Fa s
+is a null pointer, the
+.Nm
+call is equivalent to:
+.Bd -ragged -offset indent
+.Fo mbrtoc16
+.Li NULL ,
+.Li \*q\*q ,
+.Li 1 ,
+.Fa ps
+.Fc
+.Ed
+.Pp
+This always returns zero, and has the effect of resetting
+.Fa ps
+to the initial conversion state, without writing to
+.Fa pc16 ,
+even if it is nonnull.
+.Pp
+If
+.Fa ps
+is a null pointer,
+.Nm
+uses an internal
+.Vt mbstate_t
+object with static storage duration, distinct from all other
+.Vt mbstate_t
+objects (including those used by
+.Xr mbrtoc32 3 ,
+.Xr c16rtomb 3 ,
+and
+.Xr c32rtomb 3 ) ,
+which is initialized at program startup to the initial conversion
+state.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns:
+.Bl -tag -width ".Li (size_t)-3" -offset indent
+.It Li 0
+[null]
+if within the next
+.Fa n
+bytes at
+.Fa s
+the first multibyte character is null.
+.It Fa i
+[code unit]
+where
+.Li 0
+\*(Le
+.Fa i
+\*(Le
+.Fa n ,
+if either
+.Fa ps
+is in the initial conversion state or the previous call to
+.Nm
+with
+.Fa ps
+had not yielded a surrogate code point, and within the first
+.Fa i
+bytes at
+.Fa s
+a Unicode scalar value was decoded.
+.It Li (size_t)-3
+[continuation]
+if the previous call to
+.Nm
+with
+.Fa ps
+had yielded a high surrogate code point for a Unicode scalar value
+outside the Basic Multilingual Plane; no additional input is consumed
+in this case.
+.It Li (size_t)-2
+[incomplete]
+if either
+.Fa ps
+is in the initial conversion state or the previous call to
+.Nm
+with
+.Fa ps
+had not yielded a surrogate code point, and within the first
+.Fa n
+bytes at
+.Fa s ,
+including any previously buffered input, no complete Unicode scalar
+value could be decoded.
+.It Li (size_t)-1
+[error]
+if any encoding error was detected;
+.Xr errno 2
+is set to reflect the error.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+Print the UTF-16 code units of a multibyte string in hexadecimal text:
+.Bd -literal -offset indent
+char *s = ...;
+size_t n = ...;
+mbstate_t mbs = {0};	/* initial conversion state */
+
+while (n) {
+	char16_t c16;
+	size_t len;
+
+	len = mbrtoc16(&c16, s, n, &mbs);
+	switch (len) {
+	case 0:		/* null terminator */
+		assert(c16 == L'\e0');
+		goto out;
+	default:	/* scalar value or high surrogate */
+		printf("U+%04"PRIx16"\n", (uint16_t)c16);
+		break;
+	case (size_t)-3: /* low surrogate */
+		printf("continue U+%04"PRIx16"\n", (uint16_t)c16);
+		break;
+	case (size_t)-2: /* incomplete */
+		printf("incomplete\en");
+		goto readmore;
+	case (size_t)-1: /* error */
+		printf("error: %d\n", errno);
+		goto out;
+	}
+	s += len;
+	n -= len;
+}
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+The multibyte sequence cannot be decoded as a Unicode scalar value.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c16rtomb 3 ,
+.Xr c32rtomb 3 ,
+.Xr mbrtoc32 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.Rs
+.%A P. Hoffman
+.%A F. Yergeau
+.%T UTF-16, an encoding of ISO 10646
+.%R RFC 2781
+.%D February 2000
+.%I Internet Engineering Task Force
+.%U https://datatracker.ietf.org/doc/html/rfc2781
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.Nm
+function conforms to
+.St -isoC-2011 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
Index: src/lib/libc/locale/mbrtoc16.c
diff -u /dev/null src/lib/libc/locale/mbrtoc16.c:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc16.c	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,192 @@
+/*	$NetBSD: mbrtoc16.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * mbrtoc16(&c16, s, n, ps)
+ *
+ *	Decode a Unicode scalar value from up to n bytes out of the
+ *	multibyte string s, using multibyte encoding state ps, and
+ *	store the next code unit in the UTF-16 representation of that
+ *	scalar value at c16.
+ *
+ *	If the next scalar value in s is outside the Basic Multilingual
+ *	Plane, mbrtoc16 will yield the high surrogate code point in one
+ *	call that consumes input, and will yield the low surrogate code
+ *	point in the next call without consuming any input and
+ *	returning (size_t)-3 instead.
+ *
+ *	Return the number of bytes consumed on success, or:
+ *
+ *	- 0 if the code unit is NUL, or
+ *	- (size_t)-3 if the trailing low surrogate of a surrogate pair
+ *	  was returned without consuming any additional input, or
+ *	- (size_t)-2 if the input is incomplete, or
+ *	- (size_t)-1 on error with errno set to EILSEQ.
+ *
+ *	In the case of incomplete input, the decoding state so far
+ *	after processing s[0], s[1], ..., s[n - 1] is saved in ps, so
+ *	subsequent calls to mbrtoc16 will pick up n bytes later into
+ *	the input stream.
+ *
+ * References:
+ *
+ *	The Unicode Standard, Version 15.0 -- Core Specification, The
+ *	Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ *	https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ *	https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144
+ *
+ *	The Unicode Standard, Version 15.0 -- Core Specification, The
+ *	Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-16,
+ *	p. 124.
+ *	https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *	https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150
+ *
+ *	P. Hoffman and F. Yergeau, `UTF-16, an encoding of ISO 10646',
+ *	RFC 2781, Internet Engineering Task Force, February 2000,
+ *	Sec. 2.1: `Encoding UTF-16'.
+ *	https://datatracker.ietf.org/doc/html/rfc2781#section-2.1
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: mbrtoc16.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $");
+
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <uchar.h>
+
+#include "mbrtoc32.h"
+
+struct mbrtoc16state {
+	char16_t	surrogate;
+	mbstate_t	mbs;
+};
+__CTASSERT(offsetof(struct mbrtoc16state, mbs) <= sizeof(mbstate_t));
+__CTASSERT(sizeof(struct mbrtoc32state) <= sizeof(mbstate_t) -
+    offsetof(struct mbrtoc16state, mbs));
+__CTASSERT(_Alignof(struct mbrtoc16state) <= _Alignof(mbstate_t));
+
+size_t
+mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n,
+    mbstate_t *restrict ps)
+{
+	static mbstate_t psbuf;
+	struct mbrtoc16state *S;
+	char32_t c32;
+	size_t len;
+
+	/*
+	 * `If ps is a null pointer, each function uses its own
+	 *  internal mbstate_t object instead, which is initialized at
+	 *  program startup to the initial conversion state; the
+	 *  functions are not required to avoid data races with other
+	 *  calls to the same function in this case.  The
+	 *  implementation behaves as if no library function calls
+	 *  these functions with a null pointer for ps.'
+	 */
+	if (ps == NULL)
+		ps = &psbuf;
+
+	/*
+	 * `If s is a null pointer, the mbrtoc16 function is equivalent
+	 *  to the call:
+	 *
+	 *	mbrtoc16(NULL, "", 1, ps)
+	 *
+	 *  In this case, the values of the parameters pc16 and n are
+	 *  ignored.'
+	 */
+	if (s == NULL) {
+		pc16 = NULL;
+		s = "";
+		n = 1;
+	}
+
+	/*
+	 * Get the private conversion state.
+	 */
+	S = (struct mbrtoc16state *)ps;
+
+	/*
+	 * If there is a pending surrogate, stash it and consume no
+	 * bytes of the input, returning (size_t)-3 to indicate that no
+	 * bytes of input were consumed.
+	 */
+	if (S->surrogate >= 0xdc00 && S->surrogate <= 0xdfff) {
+		if (pc16)
+			*pc16 = S->surrogate;
+		S->surrogate = 0;
+		return (size_t)-3;
+	}
+
+	/*
+	 * Consume the next scalar value.  If no full scalar value can
+	 * be obtained, stop here.
+	 */
+	len = mbrtoc32(&c32, s, n, &S->mbs);
+	switch (len) {
+	case 0:			/* NUL */
+		if (pc16)
+			*pc16 = 0;
+		return 0;
+	case (size_t)-2:	/* still incomplete after n bytes */
+	case (size_t)-1:	/* error */
+		return len;
+	default:		/* consumed len bytes of input */
+		break;
+	}
+
+	/*
+	 * We consumed a scalar value from the input.
+	 *
+	 * If it's inside the Basic Multilingual Plane (16-bit scalar
+	 * values), return it.
+	 *
+	 * If it's outside the Basic Multilingual Plane, split it into
+	 * high and low surrogate code points, return the high, and
+	 * save the low.
+	 */
+	if (c32 <= 0xffff) {
+		if (pc16)
+			*pc16 = c32;
+		_DIAGASSERT(S->surrogate == 0);
+	} else {
+		c32 -= 0x10000;
+		const char16_t w1 = 0xd800 | __SHIFTOUT(c32, __BITS(19,10));
+		const char16_t w2 = 0xdc00 | __SHIFTOUT(c32, __BITS(9,0));
+		if (pc16)
+			*pc16 = w1;
+		S->surrogate = w2;
+		_DIAGASSERT(S->surrogate != 0);
+	}
+
+	/*
+	 * Return the number of bytes consumed from the input.
+	 */
+	return len;
+}
Index: src/lib/libc/locale/mbrtoc32.3
diff -u /dev/null src/lib/libc/locale/mbrtoc32.3:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc32.3	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,236 @@
+.\"	$NetBSD: mbrtoc32.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt MBRTOC32 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm mbrtoc32
+.Nd Restartable multibyte to UTF-32 code unit conversion
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh LIBRARY
+.Lb libc
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.Ft size_t
+.Fn mbrtoc32 "char32_t * restrict pc32" \
+"const char * restrict s" \
+"size_t n" \
+"mbstate_t * restrict ps"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+function attempts to decode a multibyte character sequence at
+.Fa s
+of up to
+.Fa n
+bytes in the current locale, and yield the content as UTF-32 code
+units, i.e., Unicode scalar values, via the output parameter
+.Fa pc32 .
+.Fa pc32
+may be null, in which case no output is stored.
+.Bl -bullet
+.It
+If the multibyte sequence at
+.Fa s
+is invalid or an error occurs in decoding,
+.Nm
+returns
+.Li (size_t)-1
+and sets
+.Xr errno 2
+to indicate the error.
+.It
+If the multibyte sequence at
+.Fa s
+is still incomplete after
+.Fa n
+bytes, including any previously processed input saved in
+.Fa ps ,
+.Nm
+saves its state in
+.Fa ps
+after all the input so far and returns
+.Li "(size_t)-2".
+.It
+If
+.Nm
+finds the null scalar value at
+.Fa s ,
+then it stores zero at
+.Li * Ns Fa pc32
+and returns zero.
+.It
+If
+.Nm
+finds a nonnull scalar value, then it stores the scalar value at
+.Li * Ns Fa pc32 ,
+and returns the number of bytes it read from the input.
+.El
+.Pp
+If
+.Fa s
+is a null pointer, the
+.Nm
+call is equivalent to:
+.Bd -ragged -offset indent
+.Fo mbrtoc32
+.Li NULL ,
+.Li \*q\*q ,
+.Li 1 ,
+.Fa ps
+.Fc
+.Ed
+.Pp
+This always returns zero, and has the effect of resetting
+.Fa ps
+to the initial conversion state, without writing to
+.Fa pc32 ,
+even if it is nonnull.
+.Pp
+If
+.Fa ps
+is a null pointer,
+.Nm
+uses an internal
+.Vt mbstate_t
+object with static storage duration, distinct from all other
+.Vt mbstate_t
+objects (including those used by
+.Xr mbrtoc16 3 ,
+.Xr c16rtomb 3 ,
+and
+.Xr c32rtomb 3 ) ,
+which is initialized at program startup to the initial conversion
+state.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh RETURN VALUES
+The
+.Nm
+function returns:
+.Bl -tag -width ".Li (size_t)-3" -offset indent
+.It Li 0
+[null]
+if within the next
+.Fa n
+bytes at
+.Fa s
+the first multibyte character is null.
+.It Fa i
+[scalar value]
+where
+.Li 0
+\*(Le
+.Fa i
+\*(Le
+.Fa n ,
+if within the first
+.Fa i
+bytes at
+.Fa s
+a Unicode scalar value was decoded.
+.It Li (size_t)-2
+[incomplete]
+if within the first
+.Fa n
+bytes at
+.Fa s
+no complete Unicode scalar value could be decoded.
+.It Li (size_t)-1
+[error]
+if any encoding error was detected;
+.Xr errno 2
+is set to reflect the error.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh EXAMPLES
+.Bd -literal
+char *s = ...;
+size_t n = ...;
+mbstate_t mbs = {0};	/* initial conversion state */
+
+while (n) {
+	char32_t c32;
+	size_t len;
+
+	len = mbrtoc32(&c32, s, n, &mbs);
+	switch (len) {
+	case 0:			/* null terminator */
+		assert(c32 == L'\e0');
+		goto out;
+	default:		/* scalar value */
+		printf("U+%04"PRIx32"\n", (uint32_t)c32);
+		break;
+	case (size_t)-2:	/* incomplete */
+		printf("incomplete\en");
+		goto readmore;
+	case (size_t)-1:	/* error */
+		printf("error: %d\n", errno);
+		goto out;
+	}
+	s += len;
+	n -= len;
+}
+.Ed
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh ERRORS
+.Bl -tag -width ".Bq Er EILSEQ"
+.It Bq Er EILSEQ
+A surrogate code point was passed.
+.It Bq Er EILSEQ
+The Unicode scalar value requested cannot be encoded as a multibyte
+sequence in the current locale.
+.It Bq Er EIO
+An error occurred in loading the locale's character conversions.
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c16rtomb 3 ,
+.Xr c32rtomb 3 ,
+.Xr mbrtoc16 3 ,
+.Xr uchar 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.Nm
+function conforms to
+.St -isoC-2011 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.Nm
+function first appeared in
+.Nx 11.0 .
Index: src/lib/libc/locale/mbrtoc32.c
diff -u /dev/null src/lib/libc/locale/mbrtoc32.c:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc32.c	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,237 @@
+/*	$NetBSD: mbrtoc32.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * mbrtoc32(&c32, s, n, ps)
+ *
+ *	Decode a Unicode UTF-32 code unit from up to n bytes out of the
+ *	multibyte string s, and store it at c32, using multibyte
+ *	encoding state ps.  A UTF-32 code unit is also a Unicode scalar
+ *	value, which is any Unicode code point except a surrogate.
+ *
+ *	Return the number of bytes consumed on success, or 0 if the
+ *	code unit is NUL, or (size_t)-2 if the input is incomplete, or
+ *	(size_t)-1 on error with errno set to EILSEQ.
+ *
+ *	In the case of incomplete input, the decoding state so far
+ *	after processing s[0], s[1], ..., s[n - 1] is saved in ps, so
+ *	subsequent calls to mbrtoc32 will pick up n bytes later into
+ *	the input stream.
+ *
+ * References:
+ *
+ *	The Unicode Standard, Version 15.0 -- Core Specification, The
+ *	Unicode Consortium, Sec. 3.8 `Surrogates', p. 119.
+ *	https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ *	https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: mbrtoc32.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $");
+
+#include <sys/param.h>		/* MIN */
+#include <sys/types.h>		/* broken citrus_*.h */
+#include <sys/queue.h>		/* broken citrus_*.h */
+
+#include <assert.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <paths.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <uchar.h>
+#include <wchar.h>
+
+#include "citrus_types.h"	/* broken citrus_iconv.h */
+#include "citrus_module.h"	/* broken citrus_iconv.h */
+#include "citrus_hash.h"	/* broken citrus_iconv.h */
+#include "citrus_iconv.h"
+
+#include "mbrtoc32.h"
+
+__CTASSERT(sizeof(struct mbrtoc32state) <= sizeof(mbstate_t));
+__CTASSERT(_Alignof(struct mbrtoc32state) <= _Alignof(mbstate_t));
+
+size_t
+mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n,
+    mbstate_t *restrict ps)
+{
+	static mbstate_t psbuf;
+	struct mbrtoc32state *S;
+	struct _citrus_iconv *iconv = NULL;
+	size_t len;
+	char32_t c32;
+	int error, errno_save;
+
+	/*
+	 * Save errno in case _citrus_iconv_* clobbers it.
+	 */
+	errno_save = errno;
+
+	/*
+	 * `If ps is a null pointer, each function uses its own
+	 *  internal mbstate_t object instead, which is initialized at
+	 *  program startup to the initial conversion state; the
+	 *  functions are not required to avoid data races with other
+	 *  calls to the same function in this case.  The
+	 *  implementation behaves as if no library function calls
+	 *  these functions with a null pointer for ps.'
+	 */
+	if (ps == NULL)
+		ps = &psbuf;
+
+	/*
+	 * `If s is a null pointer, the mbrtoc32 function is equivalent
+	 *  to the call:
+	 *
+	 *	mbrtoc32(NULL, "", 1, ps)
+	 *
+	 *  In this case, the values of the parameters pc32 and n are
+	 *  ignored.'
+	 */
+	if (s == NULL) {
+		pc32 = NULL;
+		s = "";
+		n = 1;
+	}
+
+	/*
+	 * Get the private conversion state.
+	 */
+	S = (struct mbrtoc32state *)ps;
+
+	/*
+	 * If input length is zero, the result is always incomplete by
+	 * definition.  Don't bother with iconv -- we'd have to
+	 * disentangle truncated outputs.
+	 */
+	if (n == 0) {
+		len = (size_t)-2;
+		goto out;
+	}
+
+	/*
+	 * Reset the destination buffer if this is the initial state.
+	 */
+	if (S->dstleft == 0)
+		S->dstleft = sizeof(S->dstbuf);
+
+	/*
+	 * Open an iconv handle to convert locale-dependent multibyte
+	 * input to UTF-32LE.
+	 */
+	if ((error = _citrus_iconv_open(&iconv, _PATH_ICONV,
+		    nl_langinfo(CODESET), "utf-32le")) != 0) {
+		errno = EIO; /* XXX? */
+		len = (size_t)-1;
+		goto out;
+	}
+
+	/*
+	 * Try to iconv a minimal prefix.  If we succeed, set len to
+	 * the length consumed and goto ok.
+	 */
+	for (len = 0; len < MIN(n, sizeof(S->srcbuf) - S->nsrc);) {
+		const char *src = S->srcbuf;
+		size_t srcleft;
+		char *dst = S->dstbuf + sizeof(S->dstbuf) - S->dstleft;
+		size_t inval;
+
+		S->srcbuf[S->nsrc++] = s[len++];
+		srcleft = S->nsrc;
+
+		error = _citrus_iconv_convert(iconv,
+		    &src, &srcleft,
+		    &dst, &S->dstleft,
+		    _CITRUS_ICONV_F_HIDE_INVALID, &inval);
+		if (error != EINVAL) {
+			if (error == 0)
+				goto ok;
+			errno = error;
+			len = (size_t)-1;
+			goto out;
+		}
+	}
+
+	/*
+	 * Incomplete.  Return (size_t)-2 and let the caller try again.
+	 * We have consumed all n bytes at this point without finding a
+	 * complete code point.
+	 */
+	len = (size_t)-2;
+	goto out;
+
+ok:	/*
+	 * Successfully converted a minimal byte sequence, which should
+	 * produce exactly one UTF-32 code unit, encoded in
+	 * little-endian, representing a code point.  Get the code
+	 * point.
+	 */
+	c32 = le32dec(S->dstbuf);
+
+	/*
+	 * Reject surrogate code points.  We only deal in scalar
+	 * values.
+	 *
+	 * XXX Is this necessary?  Won't iconv take care of it for us?
+	 */
+	if (c32 >= 0xd800 && c32 <= 0xdfff) {
+		errno = EILSEQ;
+		len = (size_t)-1;
+		goto out;
+	}
+
+	/*
+	 * Non-surrogate code point -- scalar value.  Yield it.
+	 */
+	if (pc32)
+		*pc32 = c32;
+
+	/*
+	 * If we got the null scalar value, return zero length, as the
+	 * contract requires.
+	 */
+	if (c32 == 0)
+		len = 0;
+
+	/*
+	 * Make sure we preserve errno on success.
+	 */
+	errno = errno_save;
+
+out:	if (len != (size_t)-2) {
+		S->nsrc = 0;
+		memset(S, 0, sizeof(*S)); /* paranoia */
+	}
+	errno_save = errno;
+	_citrus_iconv_close(iconv);
+	errno = errno_save;
+	return len;
+}
Index: src/lib/libc/locale/mbrtoc32.h
diff -u /dev/null src/lib/libc/locale/mbrtoc32.h:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/lib/libc/locale/mbrtoc32.h	Thu Aug 15 14:16:33 2024
@@ -0,0 +1,42 @@
+/*	$NetBSD: mbrtoc32.h,v 1.1 2024/08/15 14:16:33 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	LIB_LIBC_LOCALE_MBRTOC32_H_
+#define	LIB_LIBC_LOCALE_MBRTOC32_H_
+
+#include <limits.h>
+#include <uchar.h>
+
+struct mbrtoc32state {
+	char			srcbuf[MB_LEN_MAX];
+	size_t			nsrc;
+	char			dstbuf[4];
+	size_t			dstleft;
+};
+
+#endif	/* LIB_LIBC_LOCALE_MBRTOC32_H_ */

Index: src/share/man/man3/uchar.3
diff -u /dev/null src/share/man/man3/uchar.3:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/share/man/man3/uchar.3	Thu Aug 15 14:16:34 2024
@@ -0,0 +1,131 @@
+.\"	$NetBSD: uchar.3,v 1.1 2024/08/15 14:16:34 riastradh Exp $
+.\"
+.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 14, 2024
+.Dt UCHAR 3
+.Os
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh NAME
+.Nm uchar
+.Nd Unicode utilities
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In uchar.h
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.In uchar.h
+header file declares types and functions for manipulating Unicode code
+units.
+.\""""""""""""""""""""""""""""""""""""""
+.Ss Types
+.Bl -tag -width ".Vt char32_t"
+.It Vt char16_t
+Unsigned integer type for UTF-16 code units.
+.Pp
+Same type as
+.Vt uint_least16_t
+from
+.In stdint.h .
+May represent both surrogate code points, i.e., code points in the
+interval [0xd800,0xdfff], and Unicode scalar values in the Basic
+Multilingual Plane, which are the 16-bit code points other than
+surrogate code points.
+.It Vt char32_t
+Unsigned integer type for UTF-32 code units.
+.Pp
+Same type as
+.Vt uint_least32_t
+from
+.In stdint.h .
+Can represent all Unicode scalar values, not just those in the Basic
+Multilingual Plane.
+Intended to represent only Unicode scalar values, not surrogate code
+points.
+.It Vt mbstate_t
+Opaque multibyte conversion state.
+.Pp
+Same type as in
+.Vt stddef.h
+and
+.Vt wchar.h .
+.It Vt size_t
+Unsigned integer type to represent array sizes.
+.Pp
+Same type as in
+.Vt stddef.h ,
+.Vt stdint.h ,
+and
+.Vt sys/types.h .
+.El
+.\""""""""""""""""""""""""""""""""""""""
+.Ss Functions
+The
+.In uchar.h
+header file declares the functions
+.Xr mbrtoc16 3 ,
+.Xr c16rtomb 3 ,
+.Xr mbrtoc32 3 ,
+and
+.Xr c32rtomb 3
+for conversion between multibyte sequences and UTF-16/UTF-32 code
+units.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr c16rtomb 3 ,
+.Xr c32rtomb 3 ,
+.Xr mbrtoc16 3 ,
+.Xr mbrtoc32 3
+.Rs
+.%B The Unicode Standard
+.%O Version 15.0 \(em Core Specification
+.%Q The Unicode Consortium
+.%D September 2022
+.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
+.Re
+.Rs
+.%A P. Hoffman
+.%A F. Yergeau
+.%T UTF-16, an encoding of ISO 10646
+.%R RFC 2781
+.%D February 2000
+.%I Internet Engineering Task Force
+.%U https://datatracker.ietf.org/doc/html/rfc2781
+.Re
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh STANDARDS
+The
+.In uchar.h
+header file conforms to
+.St -isoC-2011
+and
+.St -p1003.1-2024 .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh HISTORY
+The
+.In uchar.h
+header file first appeared in
+.Nx 11.0 .

Index: src/tests/lib/libc/locale/t_c16rtomb.c
diff -u /dev/null src/tests/lib/libc/locale/t_c16rtomb.c:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/tests/lib/libc/locale/t_c16rtomb.c	Thu Aug 15 14:16:34 2024
@@ -0,0 +1,187 @@
+/*	$NetBSD: t_c16rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <e...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for c16rtomb() as specified by ISO/IEC 9899:2011.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_c16rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $");
+
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+#include <uchar.h>
+
+#include <atf-c.h>
+
+static void
+require_lc_ctype(const char *locale_name)
+{
+	char *lc_ctype_set;
+
+	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
+	if (lc_ctype_set == NULL)
+		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
+		    locale_name, errno);
+
+	ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
+	    "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
+}
+
+static mbstate_t s;
+static char buf[MB_LEN_MAX + 1];
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test);
+ATF_TC_BODY(c16rtomb_c_locale_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("C");
+
+	/*
+	 * If the buffer argument is NULL, c16 is implicitly 0,
+	 * c16rtomb() resets its internal state.
+	 */
+	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n);
+
+	/* Null wide character. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
+		(unsigned char)buf[1] == 0xcc),
+	    "buf=[%02x %02x]", buf[0], buf[1]);
+
+	/* Latin letter A, internal state. */
+	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n);
+
+	/* Latin letter A. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), 1, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
+		(unsigned char)buf[1] == 0xcc),
+	    "buf=[%02x %02x]", buf[0], buf[1]);
+
+	/* Unicode character 'Pile of poo'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+}
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test);
+ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.ISO8859-1");
+
+	/* Unicode character 'Euro sign'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), (size_t)-1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+}
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_15_test);
+ATF_TC_BODY(c16rtomb_iso_8859_15_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.ISO8859-15");
+
+	/* Unicode character 'Euro sign'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), 1, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 &&
+		(unsigned char)buf[1] == 0xcc),
+	    "buf=[%02x %02x]", buf[0], buf[1]);
+}
+
+ATF_TC_WITHOUT_HEAD(c16rtomb_utf_8_test);
+ATF_TC_BODY(c16rtomb_utf_8_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.UTF-8");
+
+	/* Unicode character 'Pile of poo'. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), 4, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 &&
+		(unsigned char)buf[1] == 0x9f &&
+		(unsigned char)buf[2] == 0x92 &&
+		(unsigned char)buf[3] == 0xa9 &&
+		(unsigned char)buf[4] == 0xcc),
+	    "buf=[%02x %02x %02x %02x %02x]",
+	    buf[0], buf[1], buf[2], buf[3], buf[4]);
+
+	/* Invalid code; 'Pile of poo' without the trail surrogate. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), (size_t)-1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+
+	/* Invalid code; 'Pile of poo' without the lead surrogate. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno);
+	ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+	ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test);
+	ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test);
+	ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test);
+	ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test);
+
+	return (atf_no_error());
+}
Index: src/tests/lib/libc/locale/t_c32rtomb.c
diff -u /dev/null src/tests/lib/libc/locale/t_c32rtomb.c:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/tests/lib/libc/locale/t_c32rtomb.c	Thu Aug 15 14:16:34 2024
@@ -0,0 +1,60 @@
+/*	$NetBSD: t_c32rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_c32rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $");
+
+#include <atf-c.h>
+#include <locale.h>
+#include <uchar.h>
+
+#include "h_macros.h"
+
+ATF_TC(c32rtomb_null);
+ATF_TC_HEAD(c32rtomb_null, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "Test null string output to c32rtomb");
+}
+ATF_TC_BODY(c32rtomb_null, tc)
+{
+	char *locale;
+	mbstate_t ps = {0};
+	size_t n;
+
+	REQUIRE_LIBC((locale = setlocale(LC_ALL, "C")), NULL);
+	ATF_REQUIRE_EQ_MSG(strcmp(locale, "C"), 0, "locale=%s", locale);
+
+	ATF_CHECK_EQ_MSG((n = c32rtomb(NULL, L'x', &ps)), 1, "n=%zu", n);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+	ATF_TP_ADD_TC(tp, c32rtomb_null);
+	return atf_no_error();
+}
Index: src/tests/lib/libc/locale/t_mbrtoc16.c
diff -u /dev/null src/tests/lib/libc/locale/t_mbrtoc16.c:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/tests/lib/libc/locale/t_mbrtoc16.c	Thu Aug 15 14:16:34 2024
@@ -0,0 +1,241 @@
+/*	$NetBSD: t_mbrtoc16.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Copyright (c) 2013 Ed Schouten <e...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_mbrtoc16.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $");
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <locale.h>
+#include <string.h>
+#include <uchar.h>
+
+#include <atf-c.h>
+
+static void
+require_lc_ctype(const char *locale_name)
+{
+	char *lc_ctype_set;
+
+	lc_ctype_set = setlocale(LC_CTYPE, locale_name);
+	if (lc_ctype_set == NULL)
+		atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d",
+		    locale_name, errno);
+
+	ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0,
+	    "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name);
+}
+
+static mbstate_t s;
+static char16_t c16;
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test);
+ATF_TC_BODY(mbrtoc16_c_locale_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("C");
+
+	/* Null wide character, internal state. */
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+
+	/* Null wide character. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+
+	/* Latin letter A, internal state. */
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+	    (uint16_t)c16, (uint16_t)L'A');
+
+	/* Latin letter A. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+	    (uint16_t)c16, (uint16_t)L'A');
+
+	/* Incomplete character sequence. */
+	c16 = L'z';
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
+	    (uint16_t)c16, (uint16_t)L'z');
+
+	/* Check that mbrtoc16() doesn't access the buffer when n == 0. */
+	c16 = L'z';
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
+	    (uint16_t)c16, (uint16_t)L'z');
+
+	/* Check that mbrtoc16() doesn't read ahead too aggressively. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+	    (uint16_t)c16, (uint16_t)L'A');
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%"PRIx16" L'C'=U+%"PRIx16,
+	    (uint16_t)c16, (uint16_t)L'C');
+
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test);
+ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.ISO8859-1");
+
+	/* Currency sign. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0xa4, "c16=U+%"PRIx16, (uint16_t)c16);
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test);
+ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.ISO8859-15");
+
+	/* Euro sign. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0x20ac, "c16=U+%"PRIx16, (uint16_t)c16);
+}
+
+ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test);
+ATF_TC_BODY(mbrtoc16_utf_8_test, tc)
+{
+	size_t n;
+
+	require_lc_ctype("en_US.UTF-8");
+
+	/* Null wide character, internal state. */
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+
+	/* Null wide character. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+
+	/* Latin letter A, internal state. */
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+	    (uint16_t)c16, (uint16_t)L'A');
+
+	/* Latin letter A. */
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16,
+	    (uint16_t)c16, (uint16_t)L'A');
+
+	/* Incomplete character sequence (zero length). */
+	c16 = L'z';
+	memset(&s, 0, sizeof(s));
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16,
+	    (uint16_t)c16, (uint16_t)L'z');
+
+	/* Incomplete character sequence (truncated double-byte). */
+	memset(&s, 0, sizeof(s));
+	c16 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
+	    "n=%zu", n);
+
+	/* Same as above, but complete. */
+	memset(&s, 0, sizeof(s));
+	c16 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\x84", 2, &s)), 2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0xc4, "c16=U+%"PRIx16, (uint16_t)c16);
+
+	/* Test restarting behaviour. */
+	memset(&s, 0, sizeof(s));
+	c16 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16);
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xb7", 1, &s)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0xf7, "c16=U+%"PRIx16, (uint16_t)c16);
+
+	/* Surrogate pair. */
+	memset(&s, 0, sizeof(s));
+	c16 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s)), 4,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0xd83d, "c16=U+%"PRIx16, (uint16_t)c16);
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-3,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0xdca9, "c16=U+%"PRIx16, (uint16_t)c16);
+
+	/* Letter e with acute, precomposed. */
+	memset(&s, 0, sizeof(s));
+	c16 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\xa9", 2, &s)), 2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0xe9, "c16=U+%"PRIx16, (uint16_t)c16);
+
+	/* Letter e with acute, combined. */
+	memset(&s, 0, sizeof(s));
+	c16 = 0;
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x65\xcc\x81", 3, &s)), 1,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0x65, "c16=U+%"PRIx16, (uint16_t)c16);
+	ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xcc\x81", 2, &s)), 2,
+	    "n=%zu", n);
+	ATF_CHECK_EQ_MSG(c16, 0x301, "c16=U+%"PRIx16, (uint16_t)c16);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+	ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test);
+	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test);
+	ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test);
+	ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test);
+
+	return (atf_no_error());
+}
Index: src/tests/lib/libc/locale/t_mbrtoc32.c
diff -u /dev/null src/tests/lib/libc/locale/t_mbrtoc32.c:1.1
--- /dev/null	Thu Aug 15 14:16:34 2024
+++ src/tests/lib/libc/locale/t_mbrtoc32.c	Thu Aug 15 14:16:34 2024
@@ -0,0 +1,61 @@
+/*	$NetBSD: t_mbrtoc32.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $	*/
+
+/*-
+ * Copyright (c) 2024 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__RCSID("$NetBSD: t_mbrtoc32.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $");
+
+#include <atf-c.h>
+#include <locale.h>
+#include <uchar.h>
+
+#include "h_macros.h"
+
+ATF_TC(mbrtoc32_null);
+ATF_TC_HEAD(mbrtoc32_null, tc)
+{
+	atf_tc_set_md_var(tc, "descr", "Test null string input to mbrtoc32");
+}
+ATF_TC_BODY(mbrtoc32_null, tc)
+{
+	char *locale;
+	char32_t c32;
+	mbstate_t ps = {0};
+	size_t n;
+
+	REQUIRE_LIBC((locale = setlocale(LC_ALL, "C")), NULL);
+	ATF_REQUIRE_EQ_MSG(strcmp(locale, "C"), 0, "locale=%s", locale);
+
+	ATF_CHECK_EQ_MSG((n = mbrtoc32(&c32, NULL, 0, &ps)), 0, "n=%zu", n);
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+
+	ATF_TP_ADD_TC(tp, mbrtoc32_null);
+	return atf_no_error();
+}

Reply via email to