Module Name: src Committed By: riastradh Date: Thu Aug 15 14:16:34 UTC 2024
Modified Files: src/distrib/sets/lists/base: shl.mi src/distrib/sets/lists/comp: mi src/distrib/sets/lists/debug: mi src/distrib/sets/lists/tests: mi src/lib/libc: shlib_version src/lib/libc/locale: Makefile.inc src/share/man/man3: Makefile src/tests/lib/libc/locale: Makefile Added Files: src/lib/libc/locale: c16rtomb.3 c16rtomb.c c32rtomb.3 c32rtomb.c c32rtomb.h mbrtoc16.3 mbrtoc16.c mbrtoc32.3 mbrtoc32.c mbrtoc32.h src/share/man/man3: uchar.3 src/tests/lib/libc/locale: t_c16rtomb.c t_c32rtomb.c t_mbrtoc16.c t_mbrtoc32.c Log Message: libc: New C11 functions mbrtoc16, mbrtoc32, c16rtomb, c32rtomb. The mbrtoc16/32 functions read mulitbyte strings according to the current locale into UTF-16/32 code unit sequences; the c16/32rtomb functions write UTF-16/32 code unit sequences into multibyte strings according to the current locale. The `r' means restartable: they work incrementally and pick up where they left off. NOTE: This bumps the libc minor version, since it adds new symbols. PR lib/52374: <uchar.h> missing To generate a diff of this commit: cvs rdiff -u -r1.987 -r1.988 src/distrib/sets/lists/base/shl.mi cvs rdiff -u -r1.2468 -r1.2469 src/distrib/sets/lists/comp/mi cvs rdiff -u -r1.442 -r1.443 src/distrib/sets/lists/debug/mi cvs rdiff -u -r1.1330 -r1.1331 src/distrib/sets/lists/tests/mi cvs rdiff -u -r1.296 -r1.297 src/lib/libc/shlib_version cvs rdiff -u -r1.65 -r1.66 src/lib/libc/locale/Makefile.inc cvs rdiff -u -r0 -r1.1 src/lib/libc/locale/c16rtomb.3 \ src/lib/libc/locale/c16rtomb.c src/lib/libc/locale/c32rtomb.3 \ src/lib/libc/locale/c32rtomb.c src/lib/libc/locale/c32rtomb.h \ src/lib/libc/locale/mbrtoc16.3 src/lib/libc/locale/mbrtoc16.c \ src/lib/libc/locale/mbrtoc32.3 src/lib/libc/locale/mbrtoc32.c \ src/lib/libc/locale/mbrtoc32.h cvs rdiff -u -r1.92 -r1.93 src/share/man/man3/Makefile cvs rdiff -u -r0 -r1.1 src/share/man/man3/uchar.3 cvs rdiff -u -r1.16 -r1.17 src/tests/lib/libc/locale/Makefile cvs rdiff -u -r0 -r1.1 src/tests/lib/libc/locale/t_c16rtomb.c \ src/tests/lib/libc/locale/t_c32rtomb.c \ src/tests/lib/libc/locale/t_mbrtoc16.c \ src/tests/lib/libc/locale/t_mbrtoc32.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/distrib/sets/lists/base/shl.mi diff -u src/distrib/sets/lists/base/shl.mi:1.987 src/distrib/sets/lists/base/shl.mi:1.988 --- src/distrib/sets/lists/base/shl.mi:1.987 Fri Aug 2 17:25:38 2024 +++ src/distrib/sets/lists/base/shl.mi Thu Aug 15 14:16:32 2024 @@ -1,4 +1,4 @@ -# $NetBSD: shl.mi,v 1.987 2024/08/02 17:25:38 christos Exp $ +# $NetBSD: shl.mi,v 1.988 2024/08/15 14:16:32 riastradh Exp $ # # Note: Don't delete entries from here - mark them as "obsolete" instead, # unless otherwise stated below. @@ -22,7 +22,7 @@ ./lib/libblocklist.so.0.1 base-sys-shlib dynamicroot ./lib/libc.so base-sys-shlib dynamicroot ./lib/libc.so.12 base-sys-shlib dynamicroot -./lib/libc.so.12.221 base-sys-shlib dynamicroot +./lib/libc.so.12.222 base-sys-shlib dynamicroot ./lib/libcrypt.so base-sys-shlib dynamicroot ./lib/libcrypt.so.1 base-sys-shlib dynamicroot ./lib/libcrypt.so.1.0 base-sys-shlib dynamicroot @@ -257,7 +257,7 @@ ./usr/lib/libc++.so.1.0 base-sys-shlib compatfile,libcxx ./usr/lib/libc.so base-sys-shlib compatfile ./usr/lib/libc.so.12 base-sys-shlib compatfile -./usr/lib/libc.so.12.221 base-sys-shlib compatfile +./usr/lib/libc.so.12.222 base-sys-shlib compatfile ./usr/lib/libcbor.so base-sys-shlib compatfile ./usr/lib/libcbor.so.0 base-sys-shlib compatfile ./usr/lib/libcbor.so.0.5 base-sys-shlib compatfile Index: src/distrib/sets/lists/comp/mi diff -u src/distrib/sets/lists/comp/mi:1.2468 src/distrib/sets/lists/comp/mi:1.2469 --- src/distrib/sets/lists/comp/mi:1.2468 Thu Aug 15 13:14:43 2024 +++ src/distrib/sets/lists/comp/mi Thu Aug 15 14:16:32 2024 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.2468 2024/08/15 13:14:43 riastradh Exp $ +# $NetBSD: mi,v 1.2469 2024/08/15 14:16:32 riastradh Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. ./etc/mtree/set.comp comp-sys-root @@ -6842,6 +6842,8 @@ ./usr/share/man/cat3/bufferevent_write_buffer.0 comp-obsolete obsolete ./usr/share/man/cat3/byteorder.0 comp-c-catman .cat ./usr/share/man/cat3/bzero.0 comp-c-catman .cat +./usr/share/man/cat3/c16rtomb.0 comp-c-catman .cat +./usr/share/man/cat3/c32rtomb.0 comp-c-catman .cat ./usr/share/man/cat3/cabs.0 comp-c-catman complex,.cat ./usr/share/man/cat3/cabsf.0 comp-c-catman complex,.cat ./usr/share/man/cat3/cabsl.0 comp-c-catman complex,.cat @@ -9204,6 +9206,8 @@ ./usr/share/man/cat3/math.0 comp-c-catman .cat ./usr/share/man/cat3/mblen.0 comp-c-catman .cat ./usr/share/man/cat3/mbrlen.0 comp-c-catman .cat +./usr/share/man/cat3/mbrtoc16.0 comp-c-catman .cat +./usr/share/man/cat3/mbrtoc32.0 comp-c-catman .cat ./usr/share/man/cat3/mbrtowc.0 comp-c-catman .cat ./usr/share/man/cat3/mbsinit.0 comp-c-catman .cat ./usr/share/man/cat3/mbsrtowcs.0 comp-c-catman .cat @@ -11148,6 +11152,7 @@ ./usr/share/man/cat3/tzset.0 comp-c-catman .cat ./usr/share/man/cat3/tzsetwall.0 comp-c-catman .cat ./usr/share/man/cat3/ualarm.0 comp-c-catman .cat +./usr/share/man/cat3/uchar.0 comp-c-catman .cat ./usr/share/man/cat3/uid_from_user.0 comp-c-catman .cat ./usr/share/man/cat3/ukfs.0 comp-c-catman .cat,rump ./usr/share/man/cat3/ulimit.0 comp-c-catman .cat @@ -15402,6 +15407,8 @@ ./usr/share/man/html3/bufferevent_write_buffer.html comp-obsolete obsolete ./usr/share/man/html3/byteorder.html comp-c-htmlman html ./usr/share/man/html3/bzero.html comp-c-htmlman html +./usr/share/man/html3/c16rtomb.html comp-c-htmlman html +./usr/share/man/html3/c32rtomb.html comp-c-htmlman html ./usr/share/man/html3/cabs.html comp-c-htmlman complex,html ./usr/share/man/html3/cabsf.html comp-c-htmlman complex,html ./usr/share/man/html3/cabsl.html comp-c-htmlman complex,html @@ -17696,6 +17703,8 @@ ./usr/share/man/html3/math.html comp-c-htmlman html ./usr/share/man/html3/mblen.html comp-c-htmlman html ./usr/share/man/html3/mbrlen.html comp-c-htmlman html +./usr/share/man/html3/mbrtoc16.html comp-c-htmlman html +./usr/share/man/html3/mbrtoc32.html comp-c-htmlman html ./usr/share/man/html3/mbrtowc.html comp-c-htmlman html ./usr/share/man/html3/mbsinit.html comp-c-htmlman html ./usr/share/man/html3/mbsrtowcs.html comp-c-htmlman html @@ -19621,6 +19630,7 @@ ./usr/share/man/html3/tzset.html comp-c-htmlman html ./usr/share/man/html3/tzsetwall.html comp-c-htmlman html ./usr/share/man/html3/ualarm.html comp-c-htmlman html +./usr/share/man/html3/uchar.html comp-c-htmlman html ./usr/share/man/html3/uid_from_user.html comp-c-htmlman html ./usr/share/man/html3/ukfs.html comp-c-htmlman html,rump ./usr/share/man/html3/ulimit.html comp-c-htmlman html @@ -23821,6 +23831,8 @@ ./usr/share/man/man3/bufferevent_write_buffer.3 comp-obsolete obsolete ./usr/share/man/man3/byteorder.3 comp-c-man .man ./usr/share/man/man3/bzero.3 comp-c-man .man +./usr/share/man/man3/c16rtomb.3 comp-c-man .man +./usr/share/man/man3/c32rtomb.3 comp-c-man .man ./usr/share/man/man3/cabs.3 comp-c-man complex,.man ./usr/share/man/man3/cabsf.3 comp-c-man complex,.man ./usr/share/man/man3/cabsl.3 comp-c-man complex,.man @@ -26195,6 +26207,8 @@ ./usr/share/man/man3/math.3 comp-c-man .man ./usr/share/man/man3/mblen.3 comp-c-man .man ./usr/share/man/man3/mbrlen.3 comp-c-man .man +./usr/share/man/man3/mbrtoc16.3 comp-c-man .man +./usr/share/man/man3/mbrtoc32.3 comp-c-man .man ./usr/share/man/man3/mbrtowc.3 comp-c-man .man ./usr/share/man/man3/mbsinit.3 comp-c-man .man ./usr/share/man/man3/mbsrtowcs.3 comp-c-man .man @@ -28158,6 +28172,7 @@ ./usr/share/man/man3/tzset.3 comp-c-man .man ./usr/share/man/man3/tzsetwall.3 comp-c-man .man ./usr/share/man/man3/ualarm.3 comp-c-man .man +./usr/share/man/man3/uchar.3 comp-c-man .man ./usr/share/man/man3/uid_from_user.3 comp-c-man .man ./usr/share/man/man3/ukfs.3 comp-c-man .man,rump ./usr/share/man/man3/ulimit.3 comp-c-man .man Index: src/distrib/sets/lists/debug/mi diff -u src/distrib/sets/lists/debug/mi:1.442 src/distrib/sets/lists/debug/mi:1.443 --- src/distrib/sets/lists/debug/mi:1.442 Thu Aug 15 13:14:44 2024 +++ src/distrib/sets/lists/debug/mi Thu Aug 15 14:16:32 2024 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.442 2024/08/15 13:14:44 riastradh Exp $ +# $NetBSD: mi,v 1.443 2024/08/15 14:16:32 riastradh Exp $ # ./etc/mtree/set.debug comp-sys-root ./usr/lib comp-sys-usr compatdir @@ -2058,11 +2058,15 @@ ./usr/libdata/debug/usr/tests/lib/libc/inet/t_inet_network.debug tests-lib-debug debug,atf,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/kevent_nullmnt/h_nullmnt.debug tests-lib-debug debug,atf,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/locale/t_btowc.debug tests-lib-debug debug,atf,compattestfile +./usr/libdata/debug/usr/tests/lib/libc/locale/t_c16rtomb.debug tests-lib-debug debug,atf,compattestfile +./usr/libdata/debug/usr/tests/lib/libc/locale/t_c32rtomb.debug tests-lib-debug debug,atf,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype1.debug tests-obsolete obsolete,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/locale/t_ctype2.debug tests-obsolete obsolete,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/locale/t_digittoint.debug tests-lib-debug debug,atf,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/locale/t_ducet.debug tests-lib-debug debug,atf,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/locale/t_io.debug tests-lib-debug debug,atf,compattestfile +./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc16.debug tests-lib-debug debug,atf,compattestfile +./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtoc32.debug tests-lib-debug debug,atf,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbrtowc.debug tests-lib-debug debug,atf,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbsnrtowcs.debug tests-lib-debug debug,atf,compattestfile ./usr/libdata/debug/usr/tests/lib/libc/locale/t_mbstowcs.debug tests-lib-debug debug,atf,compattestfile Index: src/distrib/sets/lists/tests/mi diff -u src/distrib/sets/lists/tests/mi:1.1330 src/distrib/sets/lists/tests/mi:1.1331 --- src/distrib/sets/lists/tests/mi:1.1330 Thu Aug 15 13:14:44 2024 +++ src/distrib/sets/lists/tests/mi Thu Aug 15 14:16:33 2024 @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.1330 2024/08/15 13:14:44 riastradh Exp $ +# $NetBSD: mi,v 1.1331 2024/08/15 14:16:33 riastradh Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. # @@ -3073,11 +3073,15 @@ ./usr/tests/lib/libc/locale/Atffile tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/locale/Kyuafile tests-lib-tests compattestfile,atf,kyua ./usr/tests/lib/libc/locale/t_btowc tests-lib-tests compattestfile,atf +./usr/tests/lib/libc/locale/t_c16rtomb tests-lib-tests compattestfile,atf +./usr/tests/lib/libc/locale/t_c32rtomb tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/locale/t_ctype1 tests-obsolete obsolete ./usr/tests/lib/libc/locale/t_ctype2 tests-obsolete obsolete ./usr/tests/lib/libc/locale/t_digittoint tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/locale/t_ducet tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/locale/t_io tests-lib-tests compattestfile,atf +./usr/tests/lib/libc/locale/t_mbrtoc16 tests-lib-tests compattestfile,atf +./usr/tests/lib/libc/locale/t_mbrtoc32 tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/locale/t_mbrtowc tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/locale/t_mbsnrtowcs tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/locale/t_mbstowcs tests-lib-tests compattestfile,atf Index: src/lib/libc/shlib_version diff -u src/lib/libc/shlib_version:1.296 src/lib/libc/shlib_version:1.297 --- src/lib/libc/shlib_version:1.296 Fri Feb 2 21:52:22 2024 +++ src/lib/libc/shlib_version Thu Aug 15 14:16:33 2024 @@ -1,4 +1,4 @@ -# $NetBSD: shlib_version,v 1.296 2024/02/02 21:52:22 andvar Exp $ +# $NetBSD: shlib_version,v 1.297 2024/08/15 14:16:33 riastradh Exp $ # Remember to update distrib/sets/lists/base/shl.* when changing # # things we wish to do on next major version bump: @@ -55,4 +55,4 @@ # - remove tzsetwall(3), upstream has removed it # - move *rand48* to libcompat major=12 -minor=221 +minor=222 Index: src/lib/libc/locale/Makefile.inc diff -u src/lib/libc/locale/Makefile.inc:1.65 src/lib/libc/locale/Makefile.inc:1.66 --- src/lib/libc/locale/Makefile.inc:1.65 Mon Feb 15 14:35:04 2021 +++ src/lib/libc/locale/Makefile.inc Thu Aug 15 14:16:33 2024 @@ -1,5 +1,5 @@ # from: @(#)Makefile.inc 5.1 (Berkeley) 2/18/91 -# $NetBSD: Makefile.inc,v 1.65 2021/02/15 14:35:04 christos Exp $ +# $NetBSD: Makefile.inc,v 1.66 2024/08/15 14:16:33 riastradh Exp $ # locale sources .PATH: ${ARCHDIR}/locale ${.CURDIR}/locale @@ -11,6 +11,13 @@ SRCS+= setlocale.c __mb_cur_max.c \ wcstol.c wcstoll.c wcstoimax.c wcstoul.c wcstoull.c wcstoumax.c \ wcstod.c wcstof.c wcstold.c wcscoll.c wcsxfrm.c wcsftime.c +SRCS+= c16rtomb.c +SRCS+= c32rtomb.c +SRCS+= mbrtoc16.c +SRCS+= mbrtoc32.c +CPPFLAGS.c32rtomb.c+= -I${LIBCDIR}/citrus +CPPFLAGS.mbrtoc32.c+= -I${LIBCDIR}/citrus + # citrus multibyte locale support # we have quirk for libc.a - see the last part of lib/libc/Makefile CPPFLAGS+= -DWITH_RUNE -I${.CURDIR} @@ -29,6 +36,11 @@ MAN+= btowc.3 mbrtowc.3 mbsrtowcs.3 \ wctob.3 wcrtomb.3 wcsrtombs.3 \ mbrlen.3 mbsinit.3 +MAN+= c16rtomb.3 +MAN+= c32rtomb.3 +MAN+= mbrtoc16.3 +MAN+= mbrtoc32.3 + MAN+= iswalnum.3 wctype.3 iswctype.3 \ towlower.3 wctrans.3 towctrans.3 \ wcwidth.3 Index: src/share/man/man3/Makefile diff -u src/share/man/man3/Makefile:1.92 src/share/man/man3/Makefile:1.93 --- src/share/man/man3/Makefile:1.92 Thu Mar 7 22:14:20 2024 +++ src/share/man/man3/Makefile Thu Aug 15 14:16:34 2024 @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.92 2024/03/07 22:14:20 christos Exp $ +# $NetBSD: Makefile,v 1.93 2024/08/15 14:16:34 riastradh Exp $ # @(#)Makefile 8.2 (Berkeley) 12/13/93 MAN= _DIAGASSERT.3 __CONCAT.3 __FPTRCAST.3 __UNCONST.3 __USE.3 CMSG_DATA.3 \ @@ -13,6 +13,7 @@ MAN= _DIAGASSERT.3 __CONCAT.3 __FPTRCAST makedev.3 offsetof.3 param.3 paths.3 queue.3 rbtree.3 sigevent.3 \ stdarg.3 stdbool.3 stddef.3 stdint.3 stdlib.3 sysexits.3 \ tgmath.3 timeradd.3 timeval.3 tm.3 tree.3 typeof.3 types.3 \ + uchar.3 \ unistd.3 USETBL= # used by queue.3 Index: src/tests/lib/libc/locale/Makefile diff -u src/tests/lib/libc/locale/Makefile:1.16 src/tests/lib/libc/locale/Makefile:1.17 --- src/tests/lib/libc/locale/Makefile:1.16 Thu Aug 15 13:14:44 2024 +++ src/tests/lib/libc/locale/Makefile Thu Aug 15 14:16:34 2024 @@ -1,13 +1,17 @@ -# $NetBSD: Makefile,v 1.16 2024/08/15 13:14:44 riastradh Exp $ +# $NetBSD: Makefile,v 1.17 2024/08/15 14:16:34 riastradh Exp $ .include <bsd.own.mk> TESTSDIR= ${TESTSBASE}/lib/libc/locale TESTS_C+= t_btowc +TESTS_C+= t_c16rtomb +TESTS_C+= t_c32rtomb TESTS_C+= t_digittoint TESTS_C+= t_ducet TESTS_C+= t_io +TESTS_C+= t_mbrtoc16 +TESTS_C+= t_mbrtoc32 TESTS_C+= t_mbrtowc TESTS_C+= t_mbsnrtowcs TESTS_C+= t_mbstowcs Added files: Index: src/lib/libc/locale/c16rtomb.3 diff -u /dev/null src/lib/libc/locale/c16rtomb.3:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/c16rtomb.3 Thu Aug 15 14:16:33 2024 @@ -0,0 +1,199 @@ +.\" $NetBSD: c16rtomb.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd August 14, 2024 +.Dt C16RTOMB 3 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm c16rtomb +.Nd Restartable UTF-16 code unit to multibyte conversion +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh LIBRARY +.Lb libc +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.In uchar.h +.Ft size_t +.Fn c16rtomb "char * restrict s" \ +"char16_t c16" \ +"mbstate_t * restrict ps" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +The +.Nm +function attempts to encode Unicode input as a multibyte character +sequence output at +.Fa s +in the current locale, writing anywhere between zero and +.Dv MB_CUR_MAX +bytes, inclusive, to +.Fa s , +depending on the inputs and conversion state +.Fa ps . +.Pp +The input +.Fa c16 +is a UTF-16 code unit, which can be either: +.Bl -bullet +.It +a Unicode scalar value in the Basic Multilingual Plane (BMP), that is, +a 16-bit code unit outside the interval [0xd800,0xdfff]; or, +.It +over the course of two consecutive calls to +.Nm , +the high and low surrogate code points of a Unicode scalar value +outside the BMP. +.El +.Pp +If a low surrogate code point, that is, a value of +.Fa c16 +in [0xdc00,0xdfff], is passed to +.Nm +without the preceding call to it with the same +.Fa ps +having been passed a high surrogate code point, that is, a value of +.Fa c16 +in [0xd800,0xdbff], or if a high surrogate was passed in the previous +call and anything other than a low surrogate is passed, then +.Nm +will return +.Li (size_t)-1 +to denote failure with +.Xr errno 2 +set to +.Er EILSEQ . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh RETURN VALUES +The +.Nm +function returns the number of bytes written to +.Fa s +on success, or sets +.Xr errno 2 +and returns +.Li "(size_t)-1" +on failure. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh EXAMPLES +Convert a UTF-16 code unit sequence to a multibyte string, +NUL-terminate it, and print it: +.Bd -literal -offset indent +char16_t c16[] = { 0xd83d, 0xdca9 }; +char buf[__arraycount(c16)*MB_CUR_MAX + 1], *s = buf; +size_t i; +mbstate_t mbs = {0}; /* initial conversion state */ + +for (i = 0; i < __arraycount(c16); i++) { + size_t len; + + len = c16rtomb(s, c16[i], &mbs); + if (len == (size_t)-1) + err(1, "c16rtomb"); + assert(len <= sizeof(buf) - (s - buf)); + s += len; +} +*s = '\e0'; /* NUL-terminate */ +printf("%s\n", buf); +.Ed +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh ERRORS +.Bl -tag -width ".Bq Er EILSEQ" +.It Bq Er EILSEQ +A surrogate code point was passed as +.Fa c16 +when it is inappropriate. +.It Bq Er EILSEQ +The Unicode scalar value requested cannot be encoded as a multibyte +sequence in the current locale. +.It Bq Er EIO +An error occurred in loading the locale's character conversions. +.El +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr c32rtomb 3 , +.Xr mbrtoc16 3 , +.Xr mbrtoc32 3 , +.Xr uchar 3 +.Rs +.%B The Unicode Standard +.%O Version 15.0 \(em Core Specification +.%Q The Unicode Consortium +.%D September 2022 +.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf +.Re +.Rs +.%A P. Hoffman +.%A F. Yergeau +.%T UTF-16, an encoding of ISO 10646 +.%R RFC 2781 +.%D February 2000 +.%I Internet Engineering Task Force +.%U https://datatracker.ietf.org/doc/html/rfc2781 +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh STANDARDS +The +.Nm +function conforms to +.St -isoC-2011 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +function first appeared in +.Nx 11.0 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh BUGS +It is not clear from the standard how +.Nm +is supposed to behave when given a high surrogate code point followed +by a NUL: +.Bd -literal -offset indent +c16rtomb(s, 0xd800, ps); +c16rtomb(s, L'\e0', ps); +.Ed +.Pp +Currently this fails with +.Er EILSEQ +which matches other implementations, but this is at odds with language +in the standard which suggests that passing +.Li L'\e0' +should unconditionally store a null byte and reset +.Fa ps +to the initial conversion state: +.Bd -offset indent +If +.Fa c16 +is a null wide character, a null byte is stored, preceded by any shift +sequence needed to restore the initial shift state; the resulting state +described is the initial conversion state. +.Ed +.Pp +However, it is unclear what else this should store besides a null +byte. +Should it discard the pending high surrogate, or convert it to +something else and store that? Index: src/lib/libc/locale/c16rtomb.c diff -u /dev/null src/lib/libc/locale/c16rtomb.c:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/c16rtomb.c Thu Aug 15 14:16:33 2024 @@ -0,0 +1,181 @@ +/* $NetBSD: c16rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * c16rtomb(s, c16, ps) + * + * Encode the Unicode UTF-16 code unit c16, which may be surrogate + * code point, into the multibyte buffer s under the current + * locale, using multibyte encoding state ps. + * + * If c16 is a high surrogate, no output will be produced, but c16 + * will be remembered; this must be followed by another call + * passing the trailing low surrogate. + * + * If c16 is a low surrogate, it must have been preceded by a call + * with the leading high surrogate; at this point the combined + * scalar value will be produced as output. + * + * Return the number of bytes stored on success, or (size_t)-1 on + * error with errno set to EILSEQ. + * + * At most MB_CUR_MAX bytes will be stored. + * + * References: + * + * The Unicode Standard, Version 15.0 -- Core Specification, The + * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119. + * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144 + * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144 + * + * The Unicode Standard, Version 15.0 -- Core Specification, The + * Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-16, + * p. 124. + * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150 + * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150 + * + * P. Hoffman and F. Yergeau, `UTF-16, an encoding of ISO 10646', + * RFC 2781, Internet Engineering Task Force, February 2000, + * Sec. 2.2: `Decoding UTF-16'. + * https://datatracker.ietf.org/doc/html/rfc2781#section-2.2 + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: c16rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $"); + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <uchar.h> + +#include "c32rtomb.h" + +struct c16rtombstate { + char16_t surrogate; + mbstate_t mbs; +}; +__CTASSERT(offsetof(struct c16rtombstate, mbs) <= sizeof(mbstate_t)); +__CTASSERT(sizeof(struct c32rtombstate) <= sizeof(mbstate_t) - + offsetof(struct c16rtombstate, mbs)); +__CTASSERT(_Alignof(struct c16rtombstate) <= _Alignof(mbstate_t)); + +size_t +c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps) +{ + static mbstate_t psbuf; + char buf[MB_LEN_MAX]; + struct c16rtombstate *S; + char32_t c32; + + /* + * `If ps is a null pointer, each function uses its own + * internal mbstate_t object instead, which is initialized at + * program startup to the initial conversion state; the + * functions are not required to avoid data races with other + * calls to the same function in this case. The + * implementation behaves as if no library function calls + * these functions with a null pointer for ps.' + */ + if (ps == NULL) + ps = &psbuf; + + /* + * `If s is a null pointer, the c16rtomb function is equivalent + * to the call + * + * c16rtomb(buf, L'\0', ps) + * + * where buf is an internal buffer. + */ + if (s == NULL) { + s = buf; + c16 = L'\0'; + } + + /* + * Open the private UTF-16 decoding state. + */ + S = (struct c16rtombstate *)ps; + +#if 0 + /* + * `If c16 is a null wide character, a null byte is stored, + * preceded by any shift sequence needed to restore the + * initial shift state; the resulting state described is the + * initial conversion state.' + * + * XXX But what else gets stored? Do we just discard any + * pending high surrogate, or do we convert it to something + * else, or what? + */ + if (c16 == L'\0') { + S->surrogate = 0; + } +#endif + + /* + * Check whether: + * + * 1. We had previously decoded a high surrogate. + * => Decode the low surrogate -- reject if it's not a low + * surrogate -- and combine them to output a scalar + * value; clear the high surrogate for next time. + * 2. This is a high surrogate. + * => Save it and wait for the low surrogate with no output. + * 3. This is a low surrogate. + * => Reject. + * 4. This is not a surrogate. + * => Output a scalar value. + */ + if (S->surrogate != 0) { /* 1. pending surrogate pair */ + if (c16 < 0xdc00 || c16 > 0xdfff) { + errno = EILSEQ; + return (size_t)-1; + } + const char16_t w1 = S->surrogate; + const char16_t w2 = c16; + c32 = __SHIFTIN(__SHIFTOUT(w1, __BITS(9,0)), __BITS(19,10)) | + __SHIFTIN(__SHIFTOUT(w2, __BITS(9,0)), __BITS(9,0)); + c32 += 0x10000; + S->surrogate = 0; + } else if (c16 >= 0xd800 && c16 <= 0xdbff) { /* 2. high surrogate */ + S->surrogate = c16; + return 0; /* produced nothing */ + } else if (c16 >= 0xdc00 && c16 <= 0xdfff) { /* 3. low surrogate */ + errno = EILSEQ; + return (size_t)-1; + } else { /* 4. not a surrogate */ + c32 = c16; + } + + /* + * We have a scalar value. Output it. + */ + return c32rtomb(s, c32, &S->mbs); +} Index: src/lib/libc/locale/c32rtomb.3 diff -u /dev/null src/lib/libc/locale/c32rtomb.3:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/c32rtomb.3 Thu Aug 15 14:16:33 2024 @@ -0,0 +1,141 @@ +.\" $NetBSD: c32rtomb.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd August 14, 2024 +.Dt C32RTOMB 3 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm c32rtomb +.Nd Restartable UTF-32 code unit to multibyte conversion +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh LIBRARY +.Lb libc +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.In uchar.h +.Ft size_t +.Fn c32rtomb "char * restrict s" \ +"char32_t c32" \ +"mbstate_t * restrict ps" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +The +.Nm +function attempts to encode Unicode input as a multibyte character +sequence output at +.Fa s +in the current locale, writing anywhere between zero and +.Dv MB_CUR_MAX +bytes, inclusive, to +.Fa s , +depending on the inputs and conversion state +.Fa ps . +.Pp +The input +.Fa c32 +is a UTF-32 code unit, which represents a single Unicode scalar value, +i.e., a Unicode code point that is not in the interval [0xd800,0xdfff] +of surrogate code points. +.Pp +If a surrogate code point is passed, +.Nm + will return +.Li (size_t)-1 +to denote failure with +.Xr errno 2 +set to +.Er EILSEQ . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh RETURN VALUES +The +.Nm +function returns the number of bytes written to +.Fa s +on success, or sets +.Xr errno 2 +and returns +.Li "(size_t)-1" +on failure. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh EXAMPLES +Convert a sequence of Unicode scalar values to a multibyte sequence, +NUL-terminate it, and print it: +.Bd -literal -offset indent +char32_t c32[] = { 0x1f4a9, 0x20ac, 0x21 }; +char buf[__arraycountb(c32)*MB_CUR_MAX + 1], *s = buf; +size_t i; +mbstate_t mbs = {0}; /* initial conversion state */ + +for (i = 0; i < __arraycount(c32); i++) { + size_t len; + + len = c32rtomb(s, c32[i], &mbs); + if (len == (size_t)-1) + err(1, "c32rtomb"); + assert(len <= sizeof(buf) - (s - buf)); + s += len; +} +*s = '\e0'; /* NUL-terminate */ +printf("%s\n", buf); +.Ed +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh ERRORS +.Bl -tag -width ".Bq Er EILSEQ" +.It Bq Er EILSEQ +A surrogate code point was passed as +.Fa c32 . +.It Bq Er EILSEQ +The Unicode scalar value requested cannot be encoded as a multibyte +sequence in the current locale. +.It Bq Er EIO +An error occurred in loading the locale's character conversions. +.El +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr c16rtomb 3 , +.Xr mbrtoc16 3 , +.Xr mbrtoc32 3 , +.Xr uchar 3 +.Rs +.%B The Unicode Standard +.%O Version 15.0 \(em Core Specification +.%Q The Unicode Consortium +.%D September 2022 +.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh STANDARDS +The +.Nm +function conforms to +.St -isoC-2011 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +function first appeared in +.Nx 11.0 . Index: src/lib/libc/locale/c32rtomb.c diff -u /dev/null src/lib/libc/locale/c32rtomb.c:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/c32rtomb.c Thu Aug 15 14:16:33 2024 @@ -0,0 +1,162 @@ +/* $NetBSD: c32rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * c32rtomb(s, c32, ps) + * + * Encode the Unicode UTF-32 code unit c32, which must not be a + * surrogate code point, into the multibyte buffer s under the + * current locale, using multibyte encoding state ps. A UTF-32 + * code unit is also a Unicode scalar value, which is any Unicode + * code point except a surrogate. + * + * Return the number of bytes stored on success, or (size_t)-1 on + * error with errno set to EILSEQ. + * + * At most MB_CUR_MAX bytes will be stored. + * + * References: + * + * The Unicode Standard, Version 15.0 -- Core Specification, The + * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119. + * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf + * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: c32rtomb.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $"); + +#include <sys/types.h> /* broken citrus_*.h */ +#include <sys/queue.h> /* broken citrus_*.h */ + +#include <assert.h> +#include <errno.h> +#include <langinfo.h> +#include <limits.h> +#include <paths.h> +#include <stddef.h> +#include <stdlib.h> +#include <uchar.h> +#include <wchar.h> + +#include "citrus_types.h" /* broken citrus_iconv.h */ +#include "citrus_module.h" /* broken citrus_iconv.h */ +#include "citrus_hash.h" /* broken citrus_iconv.h */ +#include "citrus_iconv.h" + +size_t +c32rtomb(char *restrict s, char32_t c32, mbstate_t *restrict ps) +{ + char buf[MB_LEN_MAX]; + struct _citrus_iconv *iconv = NULL; + char srcbuf[4]; + const char *src; + char *dst; + size_t srcleft, dstleft, inval, len; + int error, errno_save; + + /* + * Save errno in case _citrus_iconv_* clobbers it. + */ + errno_save = errno; + + /* + * `If s is a null pointer, the c32rtomb function is equivalent + * to the call + * + * c32rtomb(buf, L'\0', ps) + * + * where buf is an internal buffer.' + */ + if (s == NULL) { + s = buf; + c32 = L'\0'; + } + + /* + * Reject surrogates. + */ + if (c32 >= 0xd800 && c32 <= 0xdfff) { + errno = EILSEQ; + len = (size_t)-1; + goto out; + } + + /* + * Open an iconv handle to convert UTF-32LE to locale-dependent + * multibyte output. + */ + if ((error = _citrus_iconv_open(&iconv, _PATH_ICONV, "utf-32le", + nl_langinfo(CODESET))) != 0) { + errno = EIO; /* XXX? */ + len = (size_t)-1; + goto out; + } + + /* + * Convert from UTF-32LE in our buffer. + */ + le32enc(srcbuf, c32); + src = srcbuf; + srcleft = sizeof(srcbuf); + dst = s; + dstleft = MB_CUR_MAX; + error = _citrus_iconv_convert(iconv, + &src, &srcleft, + &dst, &dstleft, + _CITRUS_ICONV_F_HIDE_INVALID, &inval); + if (error) { /* can't be incomplete, must be error */ + errno = error; + len = (size_t)-1; + goto out; + } + _DIAGASSERT(srcleft == 0); + _DIAGASSERT(dstleft <= MB_CUR_MAX); + + /* + * If we didn't produce any output, that means the scalar value + * c32 can't be encoded in the current locale, so treat it as + * EILSEQ. + */ + len = MB_CUR_MAX - dstleft; + if (len == 0) { + errno = EILSEQ; + len = (size_t)-1; + goto out; + } + + /* + * Make sure we preserve errno on success. + */ + errno = errno_save; + +out: errno_save = errno; + _citrus_iconv_close(iconv); + errno = errno_save; + return len; +} Index: src/lib/libc/locale/c32rtomb.h diff -u /dev/null src/lib/libc/locale/c32rtomb.h:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/c32rtomb.h Thu Aug 15 14:16:33 2024 @@ -0,0 +1,36 @@ +/* $NetBSD: c32rtomb.h,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LIB_LIBC_LOCALE_C32RTOMB_H_ +#define LIB_LIBC_LOCALE_C32RTOMB_H_ + +struct c32rtombstate { + char dummy; +}; + +#endif /* LIB_LIBC_LOCALE_C32RTOMB_H_ */ Index: src/lib/libc/locale/mbrtoc16.3 diff -u /dev/null src/lib/libc/locale/mbrtoc16.3:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/mbrtoc16.3 Thu Aug 15 14:16:33 2024 @@ -0,0 +1,304 @@ +.\" $NetBSD: mbrtoc16.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd August 14, 2024 +.Dt MBRTOC16 3 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm mbrtoc16 +.Nd Restartable multibyte to UTF-16 code unit conversion +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh LIBRARY +.Lb libc +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.In uchar.h +.Ft size_t +.Fn mbrtoc16 "char16_t * restrict pc16" \ +"const char * restrict s" \ +"size_t n" \ +"mbstate_t * restrict ps" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +The +.Nm +function attempts to decode a multibyte character sequence at +.Fa s +of up to +.Fa n +bytes in the current locale, and yield the content as UTF-16 code +units via the output parameter +.Fa pc16 . +.Fa pc16 +may be null, in which case no output is stored. +.Bl -bullet +.It +If the multibyte sequence at +.Fa s +is invalid or an error occurs in decoding, +.Nm +returns +.Li (size_t)-1 +and sets +.Xr errno 2 +to indicate the error. +.It +If the multibyte sequence at +.Fa s +is still incomplete after +.Fa n +bytes, including any previously processed input saved in +.Fa ps , +.Nm +saves its state in +.Fa ps +after all the input so far and returns +.Li "(size_t)-2". +.It +If +.Nm +finds the null scalar value at +.Fa s , +then it stores zero at +.Li * Ns Fa pc16 +and returns zero. +.It +If +.Nm +finds a nonnull scalar value in the Basic Multilingual Plane, i.e., a +16-bit scalar value, then it stores the scalar value at +.Li * Ns Fa pc16 , +and returns the number of bytes it read from the input. +.It +If +.Nm +finds a scalar value outside the Basic Multilingual Plane (BMP), then +it: +.Bl -dash -compact +.It +stores the scalar value's high surrogate code point at +.Li * Ns Fa pc16 ; +.It +stores conversion state in +.Fa ps +to remember the rest of the pending scalar value; and +.It +returns the number of bytes it read from the input. +.El +.It +If +.Nm +had previously found a scalar value outside the BMP, then, instead of +any of the above options, it: +.Bl -dash -compact +.It +stores the scalar value's low surrogate code point at +.Li * Ns Fa pc16 ; +.It +consumes rest of the pending scalar value from the conversion state +.Fa ps ; +and +.It +returns +.Li (size_t)-3 +to indicate that no bytes were consumed but a code unit was yielded +nevertheless. +.El +.El +.Pp +If +.Fa s +is a null pointer, the +.Nm +call is equivalent to: +.Bd -ragged -offset indent +.Fo mbrtoc16 +.Li NULL , +.Li \*q\*q , +.Li 1 , +.Fa ps +.Fc +.Ed +.Pp +This always returns zero, and has the effect of resetting +.Fa ps +to the initial conversion state, without writing to +.Fa pc16 , +even if it is nonnull. +.Pp +If +.Fa ps +is a null pointer, +.Nm +uses an internal +.Vt mbstate_t +object with static storage duration, distinct from all other +.Vt mbstate_t +objects (including those used by +.Xr mbrtoc32 3 , +.Xr c16rtomb 3 , +and +.Xr c32rtomb 3 ) , +which is initialized at program startup to the initial conversion +state. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh RETURN VALUES +The +.Nm +function returns: +.Bl -tag -width ".Li (size_t)-3" -offset indent +.It Li 0 +[null] +if within the next +.Fa n +bytes at +.Fa s +the first multibyte character is null. +.It Fa i +[code unit] +where +.Li 0 +\*(Le +.Fa i +\*(Le +.Fa n , +if either +.Fa ps +is in the initial conversion state or the previous call to +.Nm +with +.Fa ps +had not yielded a surrogate code point, and within the first +.Fa i +bytes at +.Fa s +a Unicode scalar value was decoded. +.It Li (size_t)-3 +[continuation] +if the previous call to +.Nm +with +.Fa ps +had yielded a high surrogate code point for a Unicode scalar value +outside the Basic Multilingual Plane; no additional input is consumed +in this case. +.It Li (size_t)-2 +[incomplete] +if either +.Fa ps +is in the initial conversion state or the previous call to +.Nm +with +.Fa ps +had not yielded a surrogate code point, and within the first +.Fa n +bytes at +.Fa s , +including any previously buffered input, no complete Unicode scalar +value could be decoded. +.It Li (size_t)-1 +[error] +if any encoding error was detected; +.Xr errno 2 +is set to reflect the error. +.El +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh EXAMPLES +Print the UTF-16 code units of a multibyte string in hexadecimal text: +.Bd -literal -offset indent +char *s = ...; +size_t n = ...; +mbstate_t mbs = {0}; /* initial conversion state */ + +while (n) { + char16_t c16; + size_t len; + + len = mbrtoc16(&c16, s, n, &mbs); + switch (len) { + case 0: /* null terminator */ + assert(c16 == L'\e0'); + goto out; + default: /* scalar value or high surrogate */ + printf("U+%04"PRIx16"\n", (uint16_t)c16); + break; + case (size_t)-3: /* low surrogate */ + printf("continue U+%04"PRIx16"\n", (uint16_t)c16); + break; + case (size_t)-2: /* incomplete */ + printf("incomplete\en"); + goto readmore; + case (size_t)-1: /* error */ + printf("error: %d\n", errno); + goto out; + } + s += len; + n -= len; +} +.Ed +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh ERRORS +.Bl -tag -width ".Bq Er EILSEQ" +.It Bq Er EILSEQ +The multibyte sequence cannot be decoded as a Unicode scalar value. +.It Bq Er EIO +An error occurred in loading the locale's character conversions. +.El +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr c16rtomb 3 , +.Xr c32rtomb 3 , +.Xr mbrtoc32 3 , +.Xr uchar 3 +.Rs +.%B The Unicode Standard +.%O Version 15.0 \(em Core Specification +.%Q The Unicode Consortium +.%D September 2022 +.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf +.Re +.Rs +.%A P. Hoffman +.%A F. Yergeau +.%T UTF-16, an encoding of ISO 10646 +.%R RFC 2781 +.%D February 2000 +.%I Internet Engineering Task Force +.%U https://datatracker.ietf.org/doc/html/rfc2781 +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh STANDARDS +The +.Nm +function conforms to +.St -isoC-2011 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +function first appeared in +.Nx 11.0 . Index: src/lib/libc/locale/mbrtoc16.c diff -u /dev/null src/lib/libc/locale/mbrtoc16.c:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/mbrtoc16.c Thu Aug 15 14:16:33 2024 @@ -0,0 +1,192 @@ +/* $NetBSD: mbrtoc16.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * mbrtoc16(&c16, s, n, ps) + * + * Decode a Unicode scalar value from up to n bytes out of the + * multibyte string s, using multibyte encoding state ps, and + * store the next code unit in the UTF-16 representation of that + * scalar value at c16. + * + * If the next scalar value in s is outside the Basic Multilingual + * Plane, mbrtoc16 will yield the high surrogate code point in one + * call that consumes input, and will yield the low surrogate code + * point in the next call without consuming any input and + * returning (size_t)-3 instead. + * + * Return the number of bytes consumed on success, or: + * + * - 0 if the code unit is NUL, or + * - (size_t)-3 if the trailing low surrogate of a surrogate pair + * was returned without consuming any additional input, or + * - (size_t)-2 if the input is incomplete, or + * - (size_t)-1 on error with errno set to EILSEQ. + * + * In the case of incomplete input, the decoding state so far + * after processing s[0], s[1], ..., s[n - 1] is saved in ps, so + * subsequent calls to mbrtoc16 will pick up n bytes later into + * the input stream. + * + * References: + * + * The Unicode Standard, Version 15.0 -- Core Specification, The + * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119. + * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144 + * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=144 + * + * The Unicode Standard, Version 15.0 -- Core Specification, The + * Unicode Consortium, Sec. 3.9 `Unicode Encoding Forms': UTF-16, + * p. 124. + * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150 + * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#page=150 + * + * P. Hoffman and F. Yergeau, `UTF-16, an encoding of ISO 10646', + * RFC 2781, Internet Engineering Task Force, February 2000, + * Sec. 2.1: `Encoding UTF-16'. + * https://datatracker.ietf.org/doc/html/rfc2781#section-2.1 + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: mbrtoc16.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $"); + +#include <assert.h> +#include <errno.h> +#include <stddef.h> +#include <uchar.h> + +#include "mbrtoc32.h" + +struct mbrtoc16state { + char16_t surrogate; + mbstate_t mbs; +}; +__CTASSERT(offsetof(struct mbrtoc16state, mbs) <= sizeof(mbstate_t)); +__CTASSERT(sizeof(struct mbrtoc32state) <= sizeof(mbstate_t) - + offsetof(struct mbrtoc16state, mbs)); +__CTASSERT(_Alignof(struct mbrtoc16state) <= _Alignof(mbstate_t)); + +size_t +mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n, + mbstate_t *restrict ps) +{ + static mbstate_t psbuf; + struct mbrtoc16state *S; + char32_t c32; + size_t len; + + /* + * `If ps is a null pointer, each function uses its own + * internal mbstate_t object instead, which is initialized at + * program startup to the initial conversion state; the + * functions are not required to avoid data races with other + * calls to the same function in this case. The + * implementation behaves as if no library function calls + * these functions with a null pointer for ps.' + */ + if (ps == NULL) + ps = &psbuf; + + /* + * `If s is a null pointer, the mbrtoc16 function is equivalent + * to the call: + * + * mbrtoc16(NULL, "", 1, ps) + * + * In this case, the values of the parameters pc16 and n are + * ignored.' + */ + if (s == NULL) { + pc16 = NULL; + s = ""; + n = 1; + } + + /* + * Get the private conversion state. + */ + S = (struct mbrtoc16state *)ps; + + /* + * If there is a pending surrogate, stash it and consume no + * bytes of the input, returning (size_t)-3 to indicate that no + * bytes of input were consumed. + */ + if (S->surrogate >= 0xdc00 && S->surrogate <= 0xdfff) { + if (pc16) + *pc16 = S->surrogate; + S->surrogate = 0; + return (size_t)-3; + } + + /* + * Consume the next scalar value. If no full scalar value can + * be obtained, stop here. + */ + len = mbrtoc32(&c32, s, n, &S->mbs); + switch (len) { + case 0: /* NUL */ + if (pc16) + *pc16 = 0; + return 0; + case (size_t)-2: /* still incomplete after n bytes */ + case (size_t)-1: /* error */ + return len; + default: /* consumed len bytes of input */ + break; + } + + /* + * We consumed a scalar value from the input. + * + * If it's inside the Basic Multilingual Plane (16-bit scalar + * values), return it. + * + * If it's outside the Basic Multilingual Plane, split it into + * high and low surrogate code points, return the high, and + * save the low. + */ + if (c32 <= 0xffff) { + if (pc16) + *pc16 = c32; + _DIAGASSERT(S->surrogate == 0); + } else { + c32 -= 0x10000; + const char16_t w1 = 0xd800 | __SHIFTOUT(c32, __BITS(19,10)); + const char16_t w2 = 0xdc00 | __SHIFTOUT(c32, __BITS(9,0)); + if (pc16) + *pc16 = w1; + S->surrogate = w2; + _DIAGASSERT(S->surrogate != 0); + } + + /* + * Return the number of bytes consumed from the input. + */ + return len; +} Index: src/lib/libc/locale/mbrtoc32.3 diff -u /dev/null src/lib/libc/locale/mbrtoc32.3:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/mbrtoc32.3 Thu Aug 15 14:16:33 2024 @@ -0,0 +1,236 @@ +.\" $NetBSD: mbrtoc32.3,v 1.1 2024/08/15 14:16:33 riastradh Exp $ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd August 14, 2024 +.Dt MBRTOC32 3 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm mbrtoc32 +.Nd Restartable multibyte to UTF-32 code unit conversion +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh LIBRARY +.Lb libc +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.In uchar.h +.Ft size_t +.Fn mbrtoc32 "char32_t * restrict pc32" \ +"const char * restrict s" \ +"size_t n" \ +"mbstate_t * restrict ps" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +The +.Nm +function attempts to decode a multibyte character sequence at +.Fa s +of up to +.Fa n +bytes in the current locale, and yield the content as UTF-32 code +units, i.e., Unicode scalar values, via the output parameter +.Fa pc32 . +.Fa pc32 +may be null, in which case no output is stored. +.Bl -bullet +.It +If the multibyte sequence at +.Fa s +is invalid or an error occurs in decoding, +.Nm +returns +.Li (size_t)-1 +and sets +.Xr errno 2 +to indicate the error. +.It +If the multibyte sequence at +.Fa s +is still incomplete after +.Fa n +bytes, including any previously processed input saved in +.Fa ps , +.Nm +saves its state in +.Fa ps +after all the input so far and returns +.Li "(size_t)-2". +.It +If +.Nm +finds the null scalar value at +.Fa s , +then it stores zero at +.Li * Ns Fa pc32 +and returns zero. +.It +If +.Nm +finds a nonnull scalar value, then it stores the scalar value at +.Li * Ns Fa pc32 , +and returns the number of bytes it read from the input. +.El +.Pp +If +.Fa s +is a null pointer, the +.Nm +call is equivalent to: +.Bd -ragged -offset indent +.Fo mbrtoc32 +.Li NULL , +.Li \*q\*q , +.Li 1 , +.Fa ps +.Fc +.Ed +.Pp +This always returns zero, and has the effect of resetting +.Fa ps +to the initial conversion state, without writing to +.Fa pc32 , +even if it is nonnull. +.Pp +If +.Fa ps +is a null pointer, +.Nm +uses an internal +.Vt mbstate_t +object with static storage duration, distinct from all other +.Vt mbstate_t +objects (including those used by +.Xr mbrtoc16 3 , +.Xr c16rtomb 3 , +and +.Xr c32rtomb 3 ) , +which is initialized at program startup to the initial conversion +state. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh RETURN VALUES +The +.Nm +function returns: +.Bl -tag -width ".Li (size_t)-3" -offset indent +.It Li 0 +[null] +if within the next +.Fa n +bytes at +.Fa s +the first multibyte character is null. +.It Fa i +[scalar value] +where +.Li 0 +\*(Le +.Fa i +\*(Le +.Fa n , +if within the first +.Fa i +bytes at +.Fa s +a Unicode scalar value was decoded. +.It Li (size_t)-2 +[incomplete] +if within the first +.Fa n +bytes at +.Fa s +no complete Unicode scalar value could be decoded. +.It Li (size_t)-1 +[error] +if any encoding error was detected; +.Xr errno 2 +is set to reflect the error. +.El +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh EXAMPLES +.Bd -literal +char *s = ...; +size_t n = ...; +mbstate_t mbs = {0}; /* initial conversion state */ + +while (n) { + char32_t c32; + size_t len; + + len = mbrtoc32(&c32, s, n, &mbs); + switch (len) { + case 0: /* null terminator */ + assert(c32 == L'\e0'); + goto out; + default: /* scalar value */ + printf("U+%04"PRIx32"\n", (uint32_t)c32); + break; + case (size_t)-2: /* incomplete */ + printf("incomplete\en"); + goto readmore; + case (size_t)-1: /* error */ + printf("error: %d\n", errno); + goto out; + } + s += len; + n -= len; +} +.Ed +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh ERRORS +.Bl -tag -width ".Bq Er EILSEQ" +.It Bq Er EILSEQ +A surrogate code point was passed. +.It Bq Er EILSEQ +The Unicode scalar value requested cannot be encoded as a multibyte +sequence in the current locale. +.It Bq Er EIO +An error occurred in loading the locale's character conversions. +.El +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr c16rtomb 3 , +.Xr c32rtomb 3 , +.Xr mbrtoc16 3 , +.Xr uchar 3 +.Rs +.%B The Unicode Standard +.%O Version 15.0 \(em Core Specification +.%Q The Unicode Consortium +.%D September 2022 +.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh STANDARDS +The +.Nm +function conforms to +.St -isoC-2011 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +function first appeared in +.Nx 11.0 . Index: src/lib/libc/locale/mbrtoc32.c diff -u /dev/null src/lib/libc/locale/mbrtoc32.c:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/mbrtoc32.c Thu Aug 15 14:16:33 2024 @@ -0,0 +1,237 @@ +/* $NetBSD: mbrtoc32.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * mbrtoc32(&c32, s, n, ps) + * + * Decode a Unicode UTF-32 code unit from up to n bytes out of the + * multibyte string s, and store it at c32, using multibyte + * encoding state ps. A UTF-32 code unit is also a Unicode scalar + * value, which is any Unicode code point except a surrogate. + * + * Return the number of bytes consumed on success, or 0 if the + * code unit is NUL, or (size_t)-2 if the input is incomplete, or + * (size_t)-1 on error with errno set to EILSEQ. + * + * In the case of incomplete input, the decoding state so far + * after processing s[0], s[1], ..., s[n - 1] is saved in ps, so + * subsequent calls to mbrtoc32 will pick up n bytes later into + * the input stream. + * + * References: + * + * The Unicode Standard, Version 15.0 -- Core Specification, The + * Unicode Consortium, Sec. 3.8 `Surrogates', p. 119. + * https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf + * https://web.archive.org/web/20240718101254/https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: mbrtoc32.c,v 1.1 2024/08/15 14:16:33 riastradh Exp $"); + +#include <sys/param.h> /* MIN */ +#include <sys/types.h> /* broken citrus_*.h */ +#include <sys/queue.h> /* broken citrus_*.h */ + +#include <assert.h> +#include <errno.h> +#include <langinfo.h> +#include <limits.h> +#include <paths.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <uchar.h> +#include <wchar.h> + +#include "citrus_types.h" /* broken citrus_iconv.h */ +#include "citrus_module.h" /* broken citrus_iconv.h */ +#include "citrus_hash.h" /* broken citrus_iconv.h */ +#include "citrus_iconv.h" + +#include "mbrtoc32.h" + +__CTASSERT(sizeof(struct mbrtoc32state) <= sizeof(mbstate_t)); +__CTASSERT(_Alignof(struct mbrtoc32state) <= _Alignof(mbstate_t)); + +size_t +mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n, + mbstate_t *restrict ps) +{ + static mbstate_t psbuf; + struct mbrtoc32state *S; + struct _citrus_iconv *iconv = NULL; + size_t len; + char32_t c32; + int error, errno_save; + + /* + * Save errno in case _citrus_iconv_* clobbers it. + */ + errno_save = errno; + + /* + * `If ps is a null pointer, each function uses its own + * internal mbstate_t object instead, which is initialized at + * program startup to the initial conversion state; the + * functions are not required to avoid data races with other + * calls to the same function in this case. The + * implementation behaves as if no library function calls + * these functions with a null pointer for ps.' + */ + if (ps == NULL) + ps = &psbuf; + + /* + * `If s is a null pointer, the mbrtoc32 function is equivalent + * to the call: + * + * mbrtoc32(NULL, "", 1, ps) + * + * In this case, the values of the parameters pc32 and n are + * ignored.' + */ + if (s == NULL) { + pc32 = NULL; + s = ""; + n = 1; + } + + /* + * Get the private conversion state. + */ + S = (struct mbrtoc32state *)ps; + + /* + * If input length is zero, the result is always incomplete by + * definition. Don't bother with iconv -- we'd have to + * disentangle truncated outputs. + */ + if (n == 0) { + len = (size_t)-2; + goto out; + } + + /* + * Reset the destination buffer if this is the initial state. + */ + if (S->dstleft == 0) + S->dstleft = sizeof(S->dstbuf); + + /* + * Open an iconv handle to convert locale-dependent multibyte + * input to UTF-32LE. + */ + if ((error = _citrus_iconv_open(&iconv, _PATH_ICONV, + nl_langinfo(CODESET), "utf-32le")) != 0) { + errno = EIO; /* XXX? */ + len = (size_t)-1; + goto out; + } + + /* + * Try to iconv a minimal prefix. If we succeed, set len to + * the length consumed and goto ok. + */ + for (len = 0; len < MIN(n, sizeof(S->srcbuf) - S->nsrc);) { + const char *src = S->srcbuf; + size_t srcleft; + char *dst = S->dstbuf + sizeof(S->dstbuf) - S->dstleft; + size_t inval; + + S->srcbuf[S->nsrc++] = s[len++]; + srcleft = S->nsrc; + + error = _citrus_iconv_convert(iconv, + &src, &srcleft, + &dst, &S->dstleft, + _CITRUS_ICONV_F_HIDE_INVALID, &inval); + if (error != EINVAL) { + if (error == 0) + goto ok; + errno = error; + len = (size_t)-1; + goto out; + } + } + + /* + * Incomplete. Return (size_t)-2 and let the caller try again. + * We have consumed all n bytes at this point without finding a + * complete code point. + */ + len = (size_t)-2; + goto out; + +ok: /* + * Successfully converted a minimal byte sequence, which should + * produce exactly one UTF-32 code unit, encoded in + * little-endian, representing a code point. Get the code + * point. + */ + c32 = le32dec(S->dstbuf); + + /* + * Reject surrogate code points. We only deal in scalar + * values. + * + * XXX Is this necessary? Won't iconv take care of it for us? + */ + if (c32 >= 0xd800 && c32 <= 0xdfff) { + errno = EILSEQ; + len = (size_t)-1; + goto out; + } + + /* + * Non-surrogate code point -- scalar value. Yield it. + */ + if (pc32) + *pc32 = c32; + + /* + * If we got the null scalar value, return zero length, as the + * contract requires. + */ + if (c32 == 0) + len = 0; + + /* + * Make sure we preserve errno on success. + */ + errno = errno_save; + +out: if (len != (size_t)-2) { + S->nsrc = 0; + memset(S, 0, sizeof(*S)); /* paranoia */ + } + errno_save = errno; + _citrus_iconv_close(iconv); + errno = errno_save; + return len; +} Index: src/lib/libc/locale/mbrtoc32.h diff -u /dev/null src/lib/libc/locale/mbrtoc32.h:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/lib/libc/locale/mbrtoc32.h Thu Aug 15 14:16:33 2024 @@ -0,0 +1,42 @@ +/* $NetBSD: mbrtoc32.h,v 1.1 2024/08/15 14:16:33 riastradh Exp $ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LIB_LIBC_LOCALE_MBRTOC32_H_ +#define LIB_LIBC_LOCALE_MBRTOC32_H_ + +#include <limits.h> +#include <uchar.h> + +struct mbrtoc32state { + char srcbuf[MB_LEN_MAX]; + size_t nsrc; + char dstbuf[4]; + size_t dstleft; +}; + +#endif /* LIB_LIBC_LOCALE_MBRTOC32_H_ */ Index: src/share/man/man3/uchar.3 diff -u /dev/null src/share/man/man3/uchar.3:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/share/man/man3/uchar.3 Thu Aug 15 14:16:34 2024 @@ -0,0 +1,131 @@ +.\" $NetBSD: uchar.3,v 1.1 2024/08/15 14:16:34 riastradh Exp $ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd August 14, 2024 +.Dt UCHAR 3 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm uchar +.Nd Unicode utilities +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.In uchar.h +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +The +.In uchar.h +header file declares types and functions for manipulating Unicode code +units. +.\"""""""""""""""""""""""""""""""""""""" +.Ss Types +.Bl -tag -width ".Vt char32_t" +.It Vt char16_t +Unsigned integer type for UTF-16 code units. +.Pp +Same type as +.Vt uint_least16_t +from +.In stdint.h . +May represent both surrogate code points, i.e., code points in the +interval [0xd800,0xdfff], and Unicode scalar values in the Basic +Multilingual Plane, which are the 16-bit code points other than +surrogate code points. +.It Vt char32_t +Unsigned integer type for UTF-32 code units. +.Pp +Same type as +.Vt uint_least32_t +from +.In stdint.h . +Can represent all Unicode scalar values, not just those in the Basic +Multilingual Plane. +Intended to represent only Unicode scalar values, not surrogate code +points. +.It Vt mbstate_t +Opaque multibyte conversion state. +.Pp +Same type as in +.Vt stddef.h +and +.Vt wchar.h . +.It Vt size_t +Unsigned integer type to represent array sizes. +.Pp +Same type as in +.Vt stddef.h , +.Vt stdint.h , +and +.Vt sys/types.h . +.El +.\"""""""""""""""""""""""""""""""""""""" +.Ss Functions +The +.In uchar.h +header file declares the functions +.Xr mbrtoc16 3 , +.Xr c16rtomb 3 , +.Xr mbrtoc32 3 , +and +.Xr c32rtomb 3 +for conversion between multibyte sequences and UTF-16/UTF-32 code +units. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Xr c16rtomb 3 , +.Xr c32rtomb 3 , +.Xr mbrtoc16 3 , +.Xr mbrtoc32 3 +.Rs +.%B The Unicode Standard +.%O Version 15.0 \(em Core Specification +.%Q The Unicode Consortium +.%D September 2022 +.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf +.Re +.Rs +.%A P. Hoffman +.%A F. Yergeau +.%T UTF-16, an encoding of ISO 10646 +.%R RFC 2781 +.%D February 2000 +.%I Internet Engineering Task Force +.%U https://datatracker.ietf.org/doc/html/rfc2781 +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh STANDARDS +The +.In uchar.h +header file conforms to +.St -isoC-2011 +and +.St -p1003.1-2024 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.In uchar.h +header file first appeared in +.Nx 11.0 . Index: src/tests/lib/libc/locale/t_c16rtomb.c diff -u /dev/null src/tests/lib/libc/locale/t_c16rtomb.c:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/tests/lib/libc/locale/t_c16rtomb.c Thu Aug 15 14:16:34 2024 @@ -0,0 +1,187 @@ +/* $NetBSD: t_c16rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $ */ + +/*- + * Copyright (c) 2002 Tim J. Robbins + * All rights reserved. + * + * Copyright (c) 2013 Ed Schouten <e...@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Test program for c16rtomb() as specified by ISO/IEC 9899:2011. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: t_c16rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $"); + +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <string.h> +#include <uchar.h> + +#include <atf-c.h> + +static void +require_lc_ctype(const char *locale_name) +{ + char *lc_ctype_set; + + lc_ctype_set = setlocale(LC_CTYPE, locale_name); + if (lc_ctype_set == NULL) + atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d", + locale_name, errno); + + ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0, + "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name); +} + +static mbstate_t s; +static char buf[MB_LEN_MAX + 1]; + +ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test); +ATF_TC_BODY(c16rtomb_c_locale_test, tc) +{ + size_t n; + + require_lc_ctype("C"); + + /* + * If the buffer argument is NULL, c16 is implicitly 0, + * c16rtomb() resets its internal state. + */ + ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n); + + /* Null wide character. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n); + ATF_CHECK_MSG(((unsigned char)buf[0] == 0 && + (unsigned char)buf[1] == 0xcc), + "buf=[%02x %02x]", buf[0], buf[1]); + + /* Latin letter A, internal state. */ + ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n); + + /* Latin letter A. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), 1, "n=%zu", n); + ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' && + (unsigned char)buf[1] == 0xcc), + "buf=[%02x %02x]", buf[0], buf[1]); + + /* Unicode character 'Pile of poo'. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1, + "n=%zu", n); + ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); + ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); +} + +ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test); +ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc) +{ + size_t n; + + require_lc_ctype("en_US.ISO8859-1"); + + /* Unicode character 'Euro sign'. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), (size_t)-1, + "n=%zu", n); + ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); + ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); +} + +ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_15_test); +ATF_TC_BODY(c16rtomb_iso_8859_15_test, tc) +{ + size_t n; + + require_lc_ctype("en_US.ISO8859-15"); + + /* Unicode character 'Euro sign'. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0x20ac, &s)), 1, "n=%zu", n); + ATF_CHECK_MSG(((unsigned char)buf[0] == 0xa4 && + (unsigned char)buf[1] == 0xcc), + "buf=[%02x %02x]", buf[0], buf[1]); +} + +ATF_TC_WITHOUT_HEAD(c16rtomb_utf_8_test); +ATF_TC_BODY(c16rtomb_utf_8_test, tc) +{ + size_t n; + + require_lc_ctype("en_US.UTF-8"); + + /* Unicode character 'Pile of poo'. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), 4, "n=%zu", n); + ATF_CHECK_MSG(((unsigned char)buf[0] == 0xf0 && + (unsigned char)buf[1] == 0x9f && + (unsigned char)buf[2] == 0x92 && + (unsigned char)buf[3] == 0xa9 && + (unsigned char)buf[4] == 0xcc), + "buf=[%02x %02x %02x %02x %02x]", + buf[0], buf[1], buf[2], buf[3], buf[4]); + + /* Invalid code; 'Pile of poo' without the trail surrogate. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xd83d, &s)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, L'A', &s)), (size_t)-1, + "n=%zu", n); + ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); + ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); + + /* Invalid code; 'Pile of poo' without the lead surrogate. */ + memset(&s, 0, sizeof(s)); + memset(buf, 0xcc, sizeof(buf)); + ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0xdca9, &s)), (size_t)-1, + "n=%zu", n); + ATF_CHECK_EQ_MSG(errno, EILSEQ, "errno=%d", errno); + ATF_CHECK_EQ_MSG((unsigned char)buf[0], 0xcc, "buf=[%02x]", buf[0]); +} + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test); + ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test); + ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test); + ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test); + + return (atf_no_error()); +} Index: src/tests/lib/libc/locale/t_c32rtomb.c diff -u /dev/null src/tests/lib/libc/locale/t_c32rtomb.c:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/tests/lib/libc/locale/t_c32rtomb.c Thu Aug 15 14:16:34 2024 @@ -0,0 +1,60 @@ +/* $NetBSD: t_c32rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: t_c32rtomb.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $"); + +#include <atf-c.h> +#include <locale.h> +#include <uchar.h> + +#include "h_macros.h" + +ATF_TC(c32rtomb_null); +ATF_TC_HEAD(c32rtomb_null, tc) +{ + atf_tc_set_md_var(tc, "descr", "Test null string output to c32rtomb"); +} +ATF_TC_BODY(c32rtomb_null, tc) +{ + char *locale; + mbstate_t ps = {0}; + size_t n; + + REQUIRE_LIBC((locale = setlocale(LC_ALL, "C")), NULL); + ATF_REQUIRE_EQ_MSG(strcmp(locale, "C"), 0, "locale=%s", locale); + + ATF_CHECK_EQ_MSG((n = c32rtomb(NULL, L'x', &ps)), 1, "n=%zu", n); +} + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, c32rtomb_null); + return atf_no_error(); +} Index: src/tests/lib/libc/locale/t_mbrtoc16.c diff -u /dev/null src/tests/lib/libc/locale/t_mbrtoc16.c:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/tests/lib/libc/locale/t_mbrtoc16.c Thu Aug 15 14:16:34 2024 @@ -0,0 +1,241 @@ +/* $NetBSD: t_mbrtoc16.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $ */ + +/*- + * Copyright (c) 2002 Tim J. Robbins + * All rights reserved. + * + * Copyright (c) 2013 Ed Schouten <e...@freebsd.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Test program for mbrtoc16() as specified by ISO/IEC 9899:2011. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: t_mbrtoc16.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $"); + +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <locale.h> +#include <string.h> +#include <uchar.h> + +#include <atf-c.h> + +static void +require_lc_ctype(const char *locale_name) +{ + char *lc_ctype_set; + + lc_ctype_set = setlocale(LC_CTYPE, locale_name); + if (lc_ctype_set == NULL) + atf_tc_fail("setlocale(LC_CTYPE, \"%s\") failed; errno=%d", + locale_name, errno); + + ATF_REQUIRE_EQ_MSG(strcmp(lc_ctype_set, locale_name), 0, + "lc_ctype_set=%s locale_name=%s", lc_ctype_set, locale_name); +} + +static mbstate_t s; +static char16_t c16; + +ATF_TC_WITHOUT_HEAD(mbrtoc16_c_locale_test); +ATF_TC_BODY(mbrtoc16_c_locale_test, tc) +{ + size_t n; + + require_lc_ctype("C"); + + /* Null wide character, internal state. */ + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); + + /* Null wide character. */ + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); + + /* Latin letter A, internal state. */ + ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, + (uint16_t)c16, (uint16_t)L'A'); + + /* Latin letter A. */ + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, + (uint16_t)c16, (uint16_t)L'A'); + + /* Incomplete character sequence. */ + c16 = L'z'; + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16, + (uint16_t)c16, (uint16_t)L'z'); + + /* Check that mbrtoc16() doesn't access the buffer when n == 0. */ + c16 = L'z'; + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16, + (uint16_t)c16, (uint16_t)L'z'); + + /* Check that mbrtoc16() doesn't read ahead too aggressively. */ + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "AB", 2, &s)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, + (uint16_t)c16, (uint16_t)L'A'); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "C", 1, &s)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, L'C', "c16=U+%"PRIx16" L'C'=U+%"PRIx16, + (uint16_t)c16, (uint16_t)L'C'); + +} + +ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_1_test); +ATF_TC_BODY(mbrtoc16_iso_8859_1_test, tc) +{ + size_t n; + + require_lc_ctype("en_US.ISO8859-1"); + + /* Currency sign. */ + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0xa4, "c16=U+%"PRIx16, (uint16_t)c16); +} + +ATF_TC_WITHOUT_HEAD(mbrtoc16_iso_8859_15_test); +ATF_TC_BODY(mbrtoc16_iso_8859_15_test, tc) +{ + size_t n; + + require_lc_ctype("en_US.ISO8859-15"); + + /* Euro sign. */ + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xa4", 1, &s)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0x20ac, "c16=U+%"PRIx16, (uint16_t)c16); +} + +ATF_TC_WITHOUT_HEAD(mbrtoc16_utf_8_test); +ATF_TC_BODY(mbrtoc16_utf_8_test, tc) +{ + size_t n; + + require_lc_ctype("en_US.UTF-8"); + + /* Null wide character, internal state. */ + ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, NULL)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); + + /* Null wide character. */ + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 1, &s)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); + + /* Latin letter A, internal state. */ + ATF_CHECK_EQ_MSG((n = mbrtoc16(NULL, 0, 0, NULL)), 0, "n=%zu", n); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, NULL)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, + (uint16_t)c16, (uint16_t)L'A'); + + /* Latin letter A. */ + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "A", 1, &s)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, L'A', "c16=U+%"PRIx16" L'A'=U+%"PRIx16, + (uint16_t)c16, (uint16_t)L'A'); + + /* Incomplete character sequence (zero length). */ + c16 = L'z'; + memset(&s, 0, sizeof(s)); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-2, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, L'z', "c16=U+%"PRIx16" L'z'=U+%"PRIx16, + (uint16_t)c16, (uint16_t)L'z'); + + /* Incomplete character sequence (truncated double-byte). */ + memset(&s, 0, sizeof(s)); + c16 = 0; + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2, + "n=%zu", n); + + /* Same as above, but complete. */ + memset(&s, 0, sizeof(s)); + c16 = 0; + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\x84", 2, &s)), 2, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0xc4, "c16=U+%"PRIx16, (uint16_t)c16); + + /* Test restarting behaviour. */ + memset(&s, 0, sizeof(s)); + c16 = 0; + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3", 1, &s)), (size_t)-2, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0, "c16=U+%"PRIx16, (uint16_t)c16); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xb7", 1, &s)), 1, "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0xf7, "c16=U+%"PRIx16, (uint16_t)c16); + + /* Surrogate pair. */ + memset(&s, 0, sizeof(s)); + c16 = 0; + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xf0\x9f\x92\xa9", 4, &s)), 4, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0xd83d, "c16=U+%"PRIx16, (uint16_t)c16); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "", 0, &s)), (size_t)-3, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0xdca9, "c16=U+%"PRIx16, (uint16_t)c16); + + /* Letter e with acute, precomposed. */ + memset(&s, 0, sizeof(s)); + c16 = 0; + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xc3\xa9", 2, &s)), 2, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0xe9, "c16=U+%"PRIx16, (uint16_t)c16); + + /* Letter e with acute, combined. */ + memset(&s, 0, sizeof(s)); + c16 = 0; + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\x65\xcc\x81", 3, &s)), 1, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0x65, "c16=U+%"PRIx16, (uint16_t)c16); + ATF_CHECK_EQ_MSG((n = mbrtoc16(&c16, "\xcc\x81", 2, &s)), 2, + "n=%zu", n); + ATF_CHECK_EQ_MSG(c16, 0x301, "c16=U+%"PRIx16, (uint16_t)c16); +} + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, mbrtoc16_c_locale_test); + ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_1_test); + ATF_TP_ADD_TC(tp, mbrtoc16_iso_8859_15_test); + ATF_TP_ADD_TC(tp, mbrtoc16_utf_8_test); + + return (atf_no_error()); +} Index: src/tests/lib/libc/locale/t_mbrtoc32.c diff -u /dev/null src/tests/lib/libc/locale/t_mbrtoc32.c:1.1 --- /dev/null Thu Aug 15 14:16:34 2024 +++ src/tests/lib/libc/locale/t_mbrtoc32.c Thu Aug 15 14:16:34 2024 @@ -0,0 +1,61 @@ +/* $NetBSD: t_mbrtoc32.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__RCSID("$NetBSD: t_mbrtoc32.c,v 1.1 2024/08/15 14:16:34 riastradh Exp $"); + +#include <atf-c.h> +#include <locale.h> +#include <uchar.h> + +#include "h_macros.h" + +ATF_TC(mbrtoc32_null); +ATF_TC_HEAD(mbrtoc32_null, tc) +{ + atf_tc_set_md_var(tc, "descr", "Test null string input to mbrtoc32"); +} +ATF_TC_BODY(mbrtoc32_null, tc) +{ + char *locale; + char32_t c32; + mbstate_t ps = {0}; + size_t n; + + REQUIRE_LIBC((locale = setlocale(LC_ALL, "C")), NULL); + ATF_REQUIRE_EQ_MSG(strcmp(locale, "C"), 0, "locale=%s", locale); + + ATF_CHECK_EQ_MSG((n = mbrtoc32(&c32, NULL, 0, &ps)), 0, "n=%zu", n); +} + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, mbrtoc32_null); + return atf_no_error(); +}