Module Name:    src
Committed By:   riastradh
Date:           Sun Aug 18 04:51:16 UTC 2024

Modified Files:
        src/tests/lib/libc/locale: t_c16rtomb.c t_c8rtomb.c

Log Message:
c8rtomb(3), c16rtomb(3), c32rtomb(3): Test stateful shift sequences.

PR lib/58612: c8rtomb/c16rtomb/c32rtomb yield suboptimal shift
sequences


To generate a diff of this commit:
cvs rdiff -u -r1.3 -r1.4 src/tests/lib/libc/locale/t_c16rtomb.c \
    src/tests/lib/libc/locale/t_c8rtomb.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/tests/lib/libc/locale/t_c16rtomb.c
diff -u src/tests/lib/libc/locale/t_c16rtomb.c:1.3 src/tests/lib/libc/locale/t_c16rtomb.c:1.4
--- src/tests/lib/libc/locale/t_c16rtomb.c:1.3	Sun Aug 18 02:19:35 2024
+++ src/tests/lib/libc/locale/t_c16rtomb.c	Sun Aug 18 04:51:16 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: t_c16rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $	*/
+/*	$NetBSD: t_c16rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2002 Tim J. Robbins
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: t_c16rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $");
+__RCSID("$NetBSD: t_c16rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $");
 
 #include <errno.h>
 #include <limits.h>
@@ -59,7 +59,7 @@ require_lc_ctype(const char *locale_name
 }
 
 static mbstate_t s;
-static char buf[MB_LEN_MAX + 1];
+static char buf[7*MB_LEN_MAX + 1];
 
 ATF_TC_WITHOUT_HEAD(c16rtomb_c_locale_test);
 ATF_TC_BODY(c16rtomb_c_locale_test, tc)
@@ -114,6 +114,88 @@ ATF_TC_BODY(c16rtomb_c_locale_test, tc)
 	    "buf=[%02x %02x]", buf[0], buf[1]);
 }
 
+ATF_TC_WITHOUT_HEAD(c16rtomb_iso2022jp_locale_test);
+ATF_TC_BODY(c16rtomb_iso2022jp_locale_test, tc)
+{
+	char *p;
+	size_t n;
+
+	require_lc_ctype("ja_JP.ISO-2022-JP");
+
+	/*
+	 * If the buffer argument is NULL, c16 is implicitly 0,
+	 * c16rtomb() resets its internal state.
+	 */
+	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, 0xdc00, NULL)), 1, "n=%zu", n);
+
+	/* Null wide character. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c16rtomb(buf, 0, &s)), 1, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
+		(unsigned char)buf[1] == 0xcc),
+	    "buf=[%02x %02x]", buf[0], buf[1]);
+
+	/* Latin letter A, internal state. */
+	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'\0', NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c16rtomb(NULL, L'A', NULL)), 1, "n=%zu", n);
+
+	/*
+	 * 1. U+0042 LATIN CAPITAL LETTER A
+	 * 2. U+00A5 YEN SIGN
+	 * 3. U+00A5 YEN SIGN (again, no shift needed)
+	 * 4. U+30A2 KATAKANA LETTER A
+	 * 5. U+30A2 KATAKANA LETTER A (again, no shift needed)
+	 * 6. incomplete UTF-16 surrogate pair -- no output
+	 * 7. U+0000 NUL (plus shift sequence to initial state)
+	 */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	p = buf;
+	ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'A', &s)), 1, "n=%zu", n); /* 1 */
+	p += 1;
+	atf_tc_expect_fail("PR lib/58612:"
+	    " c8rtomb/c16rtomb/c32rtomb yield suboptimal shift sequences");
+	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 4, "n=%zu", n); /* 2 */
+	p += 4;
+	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xa5, &s)), 1, "n=%zu", n); /* 3 */
+	p += 1;
+	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 5, "n=%zu", n); /* 4 */
+	p += 5;
+	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0x30a2, &s)), 2, "n=%zu", n); /* 5 */
+	p += 2;
+	ATF_CHECK_EQ_MSG((n = c16rtomb(p, 0xd800, &s)), 0, "n=%zu", n); /* 6 */
+	ATF_CHECK_EQ_MSG((n = c16rtomb(p, L'\0', &s)), 4, "n=%zu", n); /* 7 */
+	p += 4;
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
+		(unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */
+		(unsigned char)buf[2] == '(' &&
+		(unsigned char)buf[3] == 'J' &&
+		(unsigned char)buf[4] == 0x5c && /* YEN SIGN */
+		(unsigned char)buf[5] == 0x5c && /* YEN SIGN */
+		(unsigned char)buf[6] == 0x1b && /* shift JIS X 0208-1978 */
+		(unsigned char)buf[7] == '$' &&
+		(unsigned char)buf[8] == 'B' &&
+		(unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */
+		(unsigned char)buf[10] == 0x22 &&
+		(unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */
+		(unsigned char)buf[12] == 0x22 &&
+		(unsigned char)buf[13] == 0x1b && /* shift US-ASCII */
+		(unsigned char)buf[14] == '(' &&
+		(unsigned char)buf[15] == 'B' &&
+		(unsigned char)buf[16] == '\0' &&
+		(unsigned char)buf[17] == 0xcc),
+	    "buf=[%02x %02x %02x %02x  %02x %02x %02x %02x "
+	    " %02x %02x %02x %02x  %02x %02x %02x %02x "
+	    " %02x %02x]",
+	    buf[0], buf[1], buf[2], buf[3],
+	    buf[4], buf[5], buf[6], buf[7],
+	    buf[8], buf[9], buf[10], buf[11],
+	    buf[12], buf[13], buf[14], buf[15],
+	    buf[16], buf[17]);
+}
+
 ATF_TC_WITHOUT_HEAD(c16rtomb_iso_8859_1_test);
 ATF_TC_BODY(c16rtomb_iso_8859_1_test, tc)
 {
@@ -198,6 +280,7 @@ ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, c16rtomb_c_locale_test);
+	ATF_TP_ADD_TC(tp, c16rtomb_iso2022jp_locale_test);
 	ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_1_test);
 	ATF_TP_ADD_TC(tp, c16rtomb_iso_8859_15_test);
 	ATF_TP_ADD_TC(tp, c16rtomb_utf_8_test);
Index: src/tests/lib/libc/locale/t_c8rtomb.c
diff -u src/tests/lib/libc/locale/t_c8rtomb.c:1.3 src/tests/lib/libc/locale/t_c8rtomb.c:1.4
--- src/tests/lib/libc/locale/t_c8rtomb.c:1.3	Sun Aug 18 02:19:35 2024
+++ src/tests/lib/libc/locale/t_c8rtomb.c	Sun Aug 18 04:51:16 2024
@@ -1,4 +1,4 @@
-/*	$NetBSD: t_c8rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $	*/
+/*	$NetBSD: t_c8rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2002 Tim J. Robbins
@@ -33,7 +33,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: t_c8rtomb.c,v 1.3 2024/08/18 02:19:35 riastradh Exp $");
+__RCSID("$NetBSD: t_c8rtomb.c,v 1.4 2024/08/18 04:51:16 riastradh Exp $");
 
 #include <errno.h>
 #include <limits.h>
@@ -59,7 +59,7 @@ require_lc_ctype(const char *locale_name
 }
 
 static mbstate_t s;
-static char buf[MB_LEN_MAX + 1];
+static char buf[7*MB_LEN_MAX + 1];
 
 ATF_TC_WITHOUT_HEAD(c8rtomb_c_locale_test);
 ATF_TC_BODY(c8rtomb_c_locale_test, tc)
@@ -142,6 +142,102 @@ ATF_TC_BODY(c8rtomb_c_locale_test, tc)
 	    "buf=[%02x %02x]", buf[0], buf[1]);
 }
 
+ATF_TC_WITHOUT_HEAD(c8rtomb_iso2022jp_locale_test);
+ATF_TC_BODY(c8rtomb_iso2022jp_locale_test, tc)
+{
+	char *p;
+	size_t n;
+
+	require_lc_ctype("ja_JP.ISO-2022-JP");
+
+	/*
+	 * If the buffer argument is NULL, c8 is implicitly 0,
+	 * c8rtomb() resets its internal state.
+	 */
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0x80, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xc0, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xe0, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf0, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xf8, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfc, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xfe, NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 0xff, NULL)), 1, "n=%zu", n);
+
+	/* Null wide character. */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	ATF_CHECK_EQ_MSG((n = c8rtomb(buf, 0, &s)), 1, "n=%zu", n);
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 0 &&
+		(unsigned char)buf[1] == 0xcc),
+	    "buf=[%02x %02x]", buf[0], buf[1]);
+
+	/* Latin letter A, internal state. */
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, '\0', NULL)), 1, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(NULL, 'A', NULL)), 1, "n=%zu", n);
+
+	/*
+	 * 1. U+0042 LATIN CAPITAL LETTER A
+	 * 2. U+00A5 YEN SIGN
+	 * 3. U+00A5 YEN SIGN (again, no shift needed)
+	 * 4. U+30A2 KATAKANA LETTER A
+	 * 5. U+30A2 KATAKANA LETTER A (again, no shift needed)
+	 * 6. incomplete UTF-8 multibyte sequence -- no output
+	 * 7. U+0000 NUL (plus shift sequence to initial state)
+	 */
+	memset(&s, 0, sizeof(s));
+	memset(buf, 0xcc, sizeof(buf));
+	p = buf;
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 'A', &s)), 1, "n=%zu", n); /* 1 */
+	p += 1;
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 2 */
+	atf_tc_expect_fail("PR lib/58612:"
+	    " c8rtomb/c16rtomb/c32rtomb yield suboptimal shift sequences");
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 4, "n=%zu", n);
+	p += 4;
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xc2, &s)), 0, "n=%zu", n); /* 3 */
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa5, &s)), 1, "n=%zu", n);
+	p += 1;
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 4 */
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 4, "n=%zu", n);
+	p += 5;
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 5 */
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xa2, &s)), 2, "n=%zu", n);
+	p += 2;
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0xe3, &s)), 0, "n=%zu", n); /* 6 */
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, 0x82, &s)), 0, "n=%zu", n);
+	ATF_CHECK_EQ_MSG((n = c8rtomb(p, '\0', &s)), 4, "n=%zu", n); /* 7 */
+	p += 4;
+	ATF_CHECK_MSG(((unsigned char)buf[0] == 'A' &&
+		(unsigned char)buf[1] == 0x1b && /* shift ISO/IEC 646:JP */
+		(unsigned char)buf[2] == '(' &&
+		(unsigned char)buf[3] == 'J' &&
+		(unsigned char)buf[4] == 0x5c && /* YEN SIGN */
+		(unsigned char)buf[5] == 0x5c && /* YEN SIGN */
+		(unsigned char)buf[6] == 0x1b && /* shift JIS X 0208-1978 */
+		(unsigned char)buf[7] == '$' &&
+		(unsigned char)buf[8] == 'B' &&
+		(unsigned char)buf[9] == 0x25 && /* KATAKANA LETTER A */
+		(unsigned char)buf[10] == 0x22 &&
+		(unsigned char)buf[11] == 0x25 && /* KATAKANA LETTER A */
+		(unsigned char)buf[12] == 0x22 &&
+		(unsigned char)buf[13] == 0x1b && /* shift US-ASCII */
+		(unsigned char)buf[14] == '(' &&
+		(unsigned char)buf[15] == 'B' &&
+		(unsigned char)buf[16] == '\0' &&
+		(unsigned char)buf[17] == 0xcc),
+	    "buf=[%02x %02x %02x %02x  %02x %02x %02x %02x "
+	    " %02x %02x %02x %02x  %02x %02x %02x %02x "
+	    " %02x %02x]",
+	    buf[0], buf[1], buf[2], buf[3],
+	    buf[4], buf[5], buf[6], buf[7],
+	    buf[8], buf[9], buf[10], buf[11],
+	    buf[12], buf[13], buf[14], buf[15],
+	    buf[16], buf[17]);
+}
+
 ATF_TC_WITHOUT_HEAD(c8rtomb_iso_8859_1_test);
 ATF_TC_BODY(c8rtomb_iso_8859_1_test, tc)
 {
@@ -252,6 +348,7 @@ ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, c8rtomb_c_locale_test);
+	ATF_TP_ADD_TC(tp, c8rtomb_iso2022jp_locale_test);
 	ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_1_test);
 	ATF_TP_ADD_TC(tp, c8rtomb_iso_8859_15_test);
 	ATF_TP_ADD_TC(tp, c8rtomb_utf_8_test);

Reply via email to